In [1]:
import argparse
import os
import traceback
from rdkit import Chem
import pandas as pd
import subprocess
import sys
import requests
import time
import pickle
import numpy as np
from tqdm import tqdm
import datetime

In [2]:
def execute(name):
    
    drug_names = []
    drug_names.append(name)
    
    base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"

    url_for_synonyms_to_cid = f"{base_url}/compound/name/synonyms/json"
    response_pub_chem_synonyms = requests.post(url_for_synonyms_to_cid, {"name": name})
    if response_pub_chem_synonyms.status_code > 202:
        raise Exception(f"PUBChem ({url_for_synonyms_to_cid})\nResponse code {response_pub_chem_synonyms.status_code}\n"
                        f"{response_pub_chem_synonyms.text}")
    response_pub_chem_synonyms = response_pub_chem_synonyms.json()
    cid_record = response_pub_chem_synonyms["InformationList"]["Information"][0]

    time.sleep(1)
    
    compound_properties = {}
    url_for_pub_chem_smiles_iupac = f"{base_url}/compound/cid/property/IUPACName,CanonicalSMILES/json"
    response_pub_chem_smiles_iupac = requests.post(url_for_pub_chem_smiles_iupac, {"cid": cid_record["CID"]})
    if response_pub_chem_smiles_iupac.status_code > 202:
        raise Exception(f"PUBChem ({url_for_pub_chem_smiles_iupac})\n"
                        f"Response code {response_pub_chem_smiles_iupac.status_code}\n"
                        f"{response_pub_chem_smiles_iupac.text}")
    response_pub_chem_smiles_iupac = response_pub_chem_smiles_iupac.json()
    for params in response_pub_chem_smiles_iupac["PropertyTable"]["Properties"]:
        try:
            compound_properties[params["CID"]] = [params["CanonicalSMILES"], params["IUPACName"],
                                                  cid_record["Synonym"][0]]
        except KeyError:
            compound_properties[params["CID"]] = ["", "", ""]
            
    return compound_properties

In [7]:
#GENERATE OUTPUT FILE
def generate_results(all_cp, drug_names, not_processed, result_file_name, resultant_folder, time_date):
    keys = []
    values = []

    for e in all_cp:
        keys.append(e.keys())
        values.append(e.values())

    #generate keys
    lok = []
    list_of_keys = []
    for k in keys:
        k = list(k)
        lok.append(k)
    for k in lok:
        list_of_keys.append(k[0])

    #generate smiles, IUPAC names, generic names list 
    smile = []
    list_of_smiles = []
    list_of_IUPAC_names = []
    list_of_generic_names = []
    for v in values:
        v = list(v)
        smile.append(v)
    for v in smile:
        list_of_smiles.append(v[0][0])
        list_of_IUPAC_names.append(v[0][1])
        list_of_generic_names.append(v[0][2])
            
    df = pd.DataFrame()
        
    df['Drug Name'] = drug_names 
    df['SMILE'] = list_of_smiles
    df['IUPAC Name'] = list_of_IUPAC_names   
            
    #save CSV
    df.to_csv(f'{resultant_folder}/{time_date}_{result_file_name}.csv', index=False)
    
    #create CSV containing DrugBank IDs that were not processed 
    not_p = pd.DataFrame()
    not_p['DrugBank ID'] = not_processed
    not_p.to_csv(f'{resultant_folder}/{time_date}_{result_file_name}_not_processed.csv', index=False)
    
    print(f"TASK COMPLETED: {result_file_name}.csv and {result_file_name}_not_processed.csv CREATED in {resultant_folder}")

RUN THE NEXT CELL FOR GENERATING RESULTS 

In [6]:
#Date and Time 
x = datetime.datetime.now()
hour = x.strftime("%H")
minute = x.strftime("%M")
second = x.strftime("%S")
day = x.strftime("%d")
month = x.strftime("%m")
year = x.strftime("%y")
time_date = f"{hour}{minute}{second}-{day}{month}{year}"

text_file = str(input('Enter path to text file'))
result_file_name = str(input('Enter name of resultant file'))

resultant_folder = os.path.join(os.path.abspath(''), 'Results_DrugName_to_SMILES')

if os.path.exists(resultant_folder):
    pass
else:
    os.makedirs(resultant_folder)

all_cp = []
drug_names = []
not_processed = []
with open(text_file) as f:
        user_input = f.read()            
        if user_input != "":
            drug_ids = [x.strip() for x in user_input.strip().split("\n")]
            
            for did in tqdm(drug_ids, total=len(drug_ids)):
                try:
                    all_cp.append(execute(did))
                    drug_names.append(did)
                except:
                    not_processed.append(did)
                    print("Couldn't process DrugBank ID: ", did)
generate_results(all_cp, drug_names, not_processed, result_file_name, resultant_folder, time_date)

Enter path to text file/home/user/Downloads/DrugX/drug_dl_11/drugids.txt
Enter name of resultant fileefficient


 16%|███████▏                                    | 7/43 [00:52<03:24,  5.69s/it]

Couldn't process DrugBank ID:  DB10672


 19%|████████▏                                   | 8/43 [00:55<02:54,  4.99s/it]

Couldn't process DrugBank ID:  DB14345


 21%|█████████▏                                  | 9/43 [00:57<02:11,  3.86s/it]

Couldn't process DrugBank ID:  DB11605


 28%|████████████                               | 12/43 [01:23<03:36,  7.00s/it]

Couldn't process DrugBank ID:  DD47384


 30%|█████████████                              | 13/43 [01:26<02:51,  5.72s/it]

Couldn't process DrugBank ID:  DD43834


 63%|███████████████████████████                | 27/43 [02:47<01:28,  5.53s/it]

Couldn't process DrugBank ID:  DD38399


100%|███████████████████████████████████████████| 43/43 [04:41<00:00,  6.54s/it]

TASK COMPLETED: efficient.csv and efficient_not_processed.csv CREATED in /home/user/Downloads/DrugX/drug_dl_11/Results_DrugName_to_SMILES



