# Libraries

In [1]:
import selenium 
import csv
import re
import pandas as pd
import ast
import numpy as np
import math

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.options import Options

import time

# Parser
- Convert original non-queryable names to queryable.

In [2]:
molecules_df = pd.read_csv('task_2_no_pub.csv')
molecules_arr = molecules_df['preferred'].tolist()
common_arr = molecules_df['common'].tolist()

common_arr = [ast.literal_eval(s) for s in common_arr]

pattern = re.compile(r'(\d)-(\d)')
pattern2 = re.compile(r'_')
pattern3 = re.compile(r'(\d)-([a-zA-Z])')

# List to store updated molecule names
updated_molecules_arr = []
acc = 0

for molecule in molecules_arr:
    if isinstance(molecule, float) and (np.isnan(molecule) or math.isnan(molecule)):
        if common_arr[acc] == []:
            molecule_str = 'missing'
        else:
            molecule_str = common_arr[acc][0]
    
    else:
        molecule_str = molecule
    
    updated_molecule = pattern.sub(r'\1,\2', molecule_str)
    updated_molecule = pattern2.sub(r' ', updated_molecule)
    updated_molecule = pattern3.sub(r'\1 \2', updated_molecule)
    updated_molecules_arr.append(updated_molecule)
    
    acc += 1

updated_molecules_df = pd.DataFrame(updated_molecules_arr, columns=['updated_molecule'])
updated_molecules_df

Unnamed: 0,updated_molecule
0,OPICAPONE
1,BRILLIANT BLUE G
2,AFAMELANOTIDE
3,TALAZOPARIB
4,NBI-98782
...,...
1372,CEFIDEROCOL DITOSYLATE MONOHYDRATE
1373,BREMELANOTIDE ACETATE
1374,TECHNETIUM TC-99M GLUCARATE
1375,TECHNETIUM TC-99M 5 OXA-PNAO


# PubChem Query Result Scraper

In [3]:
def get_pubchem_url(chemical):
    return f'https://pubchem.ncbi.nlm.nih.gov/#query={chemical}'

def setup_webdriver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def get_best_match(molecule, driver):
    url = "N/A"  # Default in case of failure
    result_type = "Not Found"  # Default result type

    driver.get(get_pubchem_url(molecule))
    try:
        WebDriverWait(driver, 60).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'a[data-action="featured-result-link"], a[data-action="result-link"]')))
        elements = driver.find_elements(By.CSS_SELECTOR, 'a[data-action="featured-result-link"], a[data-action="result-link"]')
        if elements:
            element = elements[0]
            url = element.get_attribute('href')
            action = element.get_attribute('data-action')
            if action == 'featured-result-link':
                result_type = 'FEATURED'
            else:
                result_type = 'RELEVANT'
    except (NoSuchElementException, TimeoutException):
        pass  # URL remains "N/A" and result_type remains "Not Found" if no match is found or if there's a timeout

    return molecule, url, result_type

def get_best_matches(parsed_molecules):
    best_matches = []
    driver = setup_webdriver()  # Initialize the WebDriver once
    try:
        for parsed_molecule in parsed_molecules:
            molecule, url, result_type = get_best_match(parsed_molecule, driver)
            best_matches.append((molecule, url, result_type))
    finally:
        driver.quit()  # Make sure to quit the WebDriver

    return best_matches

In [4]:
best_matches = get_best_matches(updated_molecules_arr)

In [5]:
df = pd.DataFrame(best_matches, columns=['Molecule', 'Link', 'Result Type'])

In [6]:
df

Unnamed: 0,Molecule,Link,Result Type
0,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED
1,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED
2,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED
3,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED
4,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED
...,...,...,...
1372,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED
1373,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED
1374,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED
1375,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED


In [7]:
# df_merged = pd.concat([molecules_df, df], ignore_index=True, sort=False, axis=1)
df_merged = df
df_merged.columns = ['Parsed', 'Link', 'Result Type']
df_merged

Unnamed: 0,Parsed,Link,Result Type
0,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED
1,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED
2,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED
3,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED
4,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED
...,...,...,...
1372,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED
1373,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED
1374,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED
1375,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED


In [8]:
df_merged_csv = df_merged.to_csv('FINAL_MOLECULE_LINKS.csv', index = False) 

In [9]:
links_arr = df_merged["Link"].tolist()
links_arr

['https://pubchem.ncbi.nlm.nih.gov/compound/135565903',
 'https://pubchem.ncbi.nlm.nih.gov/compound/6324599',
 'https://pubchem.ncbi.nlm.nih.gov/compound/16197727',
 'https://pubchem.ncbi.nlm.nih.gov/compound/135565082',
 'https://pubchem.ncbi.nlm.nih.gov/compound/14580381',
 'https://pubchem.ncbi.nlm.nih.gov/compound/135564391',
 'https://pubchem.ncbi.nlm.nih.gov/compound/6437380',
 'https://pubchem.ncbi.nlm.nih.gov/compound/9828788',
 'https://pubchem.ncbi.nlm.nih.gov/compound/Eltrombopag',
 'https://pubchem.ncbi.nlm.nih.gov/compound/Trypan%20Blue%20free%20acid',
 'https://pubchem.ncbi.nlm.nih.gov/compound/Tiotropium',
 'https://pubchem.ncbi.nlm.nih.gov/compound/73180',
 'https://pubchem.ncbi.nlm.nih.gov/compound/60758',
 'https://pubchem.ncbi.nlm.nih.gov/compound/60560',
 'https://pubchem.ncbi.nlm.nih.gov/compound/445226',
 'https://pubchem.ncbi.nlm.nih.gov/compound/155491161',
 'https://pubchem.ncbi.nlm.nih.gov/compound/92908',
 'https://pubchem.ncbi.nlm.nih.gov/compound/5359268',


# PubChem CAS Scraper

In [10]:
def get_cas_numbers(link, driver):
    cas_numbers = {"CAS": "N/A", "Deprecated CAS": "N/A"}  # Default in case of failure

    try:
        driver.get(link)
        # Wait and locate the CAS number section
        try:
            cas_elements = WebDriverWait(driver, 60).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'section#CAS div.break-words')))
            cas_numbers["CAS"] = ', '.join([el.text.strip() for el in cas_elements])
        except (NoSuchElementException, TimeoutException):
            cas_numbers["CAS"] = "N/A"  # CAS number not found

        # Wait and locate the Deprecated CAS number section, if present
        try:
            deprecated_cas_elements = driver.find_elements(By.CSS_SELECTOR, 'section#Deprecated-CAS div.break-words')
            # deprecated_cas_elements = WebDriverWait(driver, 3).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'section#Deprecated-CAS div.break-words')))
            cas_numbers["Deprecated CAS"] = ', '.join([el.text.strip() for el in deprecated_cas_elements])
            
        except (NoSuchElementException, TimeoutException):
            cas_numbers["Deprecated CAS"] = "N/A"  # Deprecated CAS number not found
            
    except Exception as e:
        print(f"Error retrieving CAS numbers for {link}: {e}")

    return cas_numbers

def get_cas_numbers_concurrently(links):
    cas_matches = []
    driver = setup_webdriver()  # Initialize the WebDriver once

    try:
        for link in links:
            cas_matches.append(get_cas_numbers(link, driver))

    finally:
        driver.quit()  # Make sure to quit the WebDriver

    return cas_matches      

In [11]:
cas_numbers = get_cas_numbers_concurrently(links_arr)

Error retrieving CAS numbers for N/A: Message: invalid argument
  (Session info: chrome-headless-shell=126.0.6478.126)
Stacktrace:
#0 0x632f96490c5a <unknown>
#1 0x632f96173c71 <unknown>
#2 0x632f9615a303 <unknown>
#3 0x632f96158b48 <unknown>
#4 0x632f961591ca <unknown>
#5 0x632f96176877 <unknown>
#6 0x632f96202f25 <unknown>
#7 0x632f961e35c2 <unknown>
#8 0x632f96202303 <unknown>
#9 0x632f961e3363 <unknown>
#10 0x632f961b3247 <unknown>
#11 0x632f961b3b9e <unknown>
#12 0x632f9645722b <unknown>
#13 0x632f9645b2d1 <unknown>
#14 0x632f96442ade <unknown>
#15 0x632f9645be32 <unknown>
#16 0x632f9642777f <unknown>
#17 0x632f96480618 <unknown>
#18 0x632f964807f0 <unknown>
#19 0x632f9648fd8c <unknown>
#20 0x7bcbedc94ac3 <unknown>



In [12]:
df_cas = pd.DataFrame(cas_numbers, columns=['CAS', 'Deprecated CAS'])
df_cas

Unnamed: 0,CAS,Deprecated CAS
0,923287-50-7,
1,6104-58-1,"107120-23-0, 167396-16-9, 55965-18-9, 93907-61-0"
2,75921-69-6,"103088-28-4, 162112-36-9, 272781-22-3"
3,"1207456-01-6, 1207454-56-5, 1373431-65-2",
4,"85081-18-1, 171598-74-6",
...,...,...
1372,1883830-01-0,
1373,1607799-13-2,
1374,149751-64-4,
1375,161537-77-5,


# Combining Molecule Links Dataframe with CAS Dataframe

In [13]:
final_df = pd.concat([df_merged, df_cas], ignore_index=True, sort=False, axis=1)

In [14]:
final_df

Unnamed: 0,0,1,2,3,4
0,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,923287-50-7,
1,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED,6104-58-1,"107120-23-0, 167396-16-9, 55965-18-9, 93907-61-0"
2,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED,75921-69-6,"103088-28-4, 162112-36-9, 272781-22-3"
3,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,"1207456-01-6, 1207454-56-5, 1373431-65-2",
4,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED,"85081-18-1, 171598-74-6",
...,...,...,...,...,...
1372,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED,1883830-01-0,
1373,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED,1607799-13-2,
1374,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED,149751-64-4,
1375,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED,161537-77-5,


In [15]:
final_df = pd.concat([molecules_df, final_df], ignore_index=True, sort=False, axis=1)
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,923287-50-7,https://drugs.ncats.io/drug/Y5929UIJ5N,"['OPICAPONE [WHO-DD]', 'OPICAPONE [USAN]', 'OP...",OPICAPONE,[],9268.0,,N04BX04,Portela,ACHIRAL,...,"[""Parkinson's Disease""]","['20090054437', '20100113529', '20100256193', ...",['https://www.ncbi.nlm.nih.gov/pubmed/20334432...,"['20334432', '24847974', '26725544']",C549349,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,923287-50-7,
1,6104-58-1,https://drugs.ncats.io/drug/M1ZRX790SI,"['BRILLIANT BLUE G', 'BENZENEMETHANAMINIUM, N-...",BRILLIANT BLUE G,[],,,,"Mihalik, B.",ACHIRAL,...,"['Macular Hole', 'Epiretinal Membrane']",['WO2017171001'],['https://www.ncbi.nlm.nih.gov/pubmed/18653608'],['18653608'],,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED,6104-58-1,"107120-23-0, 167396-16-9, 55965-18-9, 93907-61-0"
2,75921-69-6,https://drugs.ncats.io/drug/QW68W3J66U,"['.ALPHA.-MELANOTROPIN (SWINE), 4-L-NORLEUCINE...",AFAMELANOTIDE,[],9010.0,,D02BB02,,ABSOLUTE,...,[],[],['https://www.ncbi.nlm.nih.gov/pubmed/10493100...,"['10493100', '11101306', '12007532']",C534526,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED,75921-69-6,"103088-28-4, 162112-36-9, 272781-22-3"
3,1207456-01-6,https://drugs.ncats.io/drug/9QHX048FRV,"['TALAZOPARIB [INN]', 'TALAZOPARIB [USAN]', 'T...",TALAZOPARIB,[],9868.0,,L01XX60,LEAD Therapeutics,ABSOLUTE,...,"['Breast Neoplasms', 'Ovarian Neoplasms']","['WO/2015/164586A1', 'WO/2016/018089A1']",['https://www.ncbi.nlm.nih.gov/pubmed/25795821...,"['25795821', '20871615', '23118055', '25195882...",,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,"1207456-01-6, 1207454-56-5, 1373431-65-2",
4,85081-18-1,https://drugs.ncats.io/drug/IFRYDMLSGE,"['NBI-98782', '(+)-(2R,3R,11BR)-DIHYDROTETRABE...",NBI-98782,[],,,,Adeptio Pharmaceuticals,ABSOLUTE,...,"['Neurological Disorders', 'Tardive Dyskinesia']","['20080167337', '20120077839', '8357697', 'EP2...",['https://www.ncbi.nlm.nih.gov/pubmed/22742980...,"['22742980', '25881691', '28743955', '28404690...",,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED,"85081-18-1, 171598-74-6",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,1883830-01-0,https://drugs.ncats.io/drug/3HA30AS289,['CEFIDEROCOL DITOSYLATE MONOHYDRATE'],CEFIDEROCOL DITOSYLATE MONOHYDRATE,[],,,,,,...,[],[],[],[],,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED,1883830-01-0,
1373,1607799-13-2,https://drugs.ncats.io/drug/PV2WI7495P,"['BREMELANOTIDE ACETATE [USAN]', 'BREMELANOTID...",BREMELANOTIDE ACETATE,['VYLEESI'],,,,University of Arizona,,...,[],[],['https://www.ncbi.nlm.nih.gov/pubmed/12851303...,"['12851303', '27181790', '27751477', '28189361']",,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED,1607799-13-2,
1374,149751-64-4,https://drugs.ncats.io/drug/RXZ75FE8AA,['TECHNETIUM TC-99M GLUCARATE'],TECHNETIUM TC-99M GLUCARATE,[],,,,,,...,[],['20050082469'],[],[],,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED,149751-64-4,
1375,161537-77-5,https://drugs.ncats.io/drug/TZI66E7CFY,"['TECHNETIUM TC-99M 5-OXA-PNAO', '99MTC-5-OXA-...",TECHNETIUM TC-99M 5-OXA-PNAO,[],,,,,,...,[],['20050082469'],[],[],,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED,161537-77-5,


In [16]:
final_df.columns=['url', 'common', 'preferred', 'brand', 'inn', 'pubchem', 'who_atc', 'cas', 'originator', 'stereochemistry', 'formula', 'mw', 'investigational', 'us_market', 'us_market_year', 'us_prev_market', 'first_year_approval', 'fda_links', 'target_list', 'condition_list', 'patent_list', 'pubmed', 'pmid', 'mesh', 'Parsed Molecule', 'Link', 'Result Type', 'CAS', 'Deprecated CAS']
# final_df.to_csv('margaret_missing_cas._identified.csv', index=False)

# Flagging Compound Types

In [17]:
final_df['Compound/Substance'] = final_df['Link'].apply(lambda x: 'COMPOUND' if 'compound' in str(x) else ('SUBSTANCE' if 'substance' in str(x) else 'N/A'))

In [18]:
final_df

Unnamed: 0,url,common,preferred,brand,inn,pubchem,who_atc,cas,originator,stereochemistry,...,patent_list,pubmed,pmid,mesh,Parsed Molecule,Link,Result Type,CAS,Deprecated CAS,Compound/Substance
0,923287-50-7,https://drugs.ncats.io/drug/Y5929UIJ5N,"['OPICAPONE [WHO-DD]', 'OPICAPONE [USAN]', 'OP...",OPICAPONE,[],9268.0,,N04BX04,Portela,ACHIRAL,...,"['20090054437', '20100113529', '20100256193', ...",['https://www.ncbi.nlm.nih.gov/pubmed/20334432...,"['20334432', '24847974', '26725544']",C549349,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,923287-50-7,,COMPOUND
1,6104-58-1,https://drugs.ncats.io/drug/M1ZRX790SI,"['BRILLIANT BLUE G', 'BENZENEMETHANAMINIUM, N-...",BRILLIANT BLUE G,[],,,,"Mihalik, B.",ACHIRAL,...,['WO2017171001'],['https://www.ncbi.nlm.nih.gov/pubmed/18653608'],['18653608'],,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED,6104-58-1,"107120-23-0, 167396-16-9, 55965-18-9, 93907-61-0",COMPOUND
2,75921-69-6,https://drugs.ncats.io/drug/QW68W3J66U,"['.ALPHA.-MELANOTROPIN (SWINE), 4-L-NORLEUCINE...",AFAMELANOTIDE,[],9010.0,,D02BB02,,ABSOLUTE,...,[],['https://www.ncbi.nlm.nih.gov/pubmed/10493100...,"['10493100', '11101306', '12007532']",C534526,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED,75921-69-6,"103088-28-4, 162112-36-9, 272781-22-3",COMPOUND
3,1207456-01-6,https://drugs.ncats.io/drug/9QHX048FRV,"['TALAZOPARIB [INN]', 'TALAZOPARIB [USAN]', 'T...",TALAZOPARIB,[],9868.0,,L01XX60,LEAD Therapeutics,ABSOLUTE,...,"['WO/2015/164586A1', 'WO/2016/018089A1']",['https://www.ncbi.nlm.nih.gov/pubmed/25795821...,"['25795821', '20871615', '23118055', '25195882...",,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,"1207456-01-6, 1207454-56-5, 1373431-65-2",,COMPOUND
4,85081-18-1,https://drugs.ncats.io/drug/IFRYDMLSGE,"['NBI-98782', '(+)-(2R,3R,11BR)-DIHYDROTETRABE...",NBI-98782,[],,,,Adeptio Pharmaceuticals,ABSOLUTE,...,"['20080167337', '20120077839', '8357697', 'EP2...",['https://www.ncbi.nlm.nih.gov/pubmed/22742980...,"['22742980', '25881691', '28743955', '28404690...",,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED,"85081-18-1, 171598-74-6",,COMPOUND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,1883830-01-0,https://drugs.ncats.io/drug/3HA30AS289,['CEFIDEROCOL DITOSYLATE MONOHYDRATE'],CEFIDEROCOL DITOSYLATE MONOHYDRATE,[],,,,,,...,[],[],[],,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED,1883830-01-0,,COMPOUND
1373,1607799-13-2,https://drugs.ncats.io/drug/PV2WI7495P,"['BREMELANOTIDE ACETATE [USAN]', 'BREMELANOTID...",BREMELANOTIDE ACETATE,['VYLEESI'],,,,University of Arizona,,...,[],['https://www.ncbi.nlm.nih.gov/pubmed/12851303...,"['12851303', '27181790', '27751477', '28189361']",,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED,1607799-13-2,,COMPOUND
1374,149751-64-4,https://drugs.ncats.io/drug/RXZ75FE8AA,['TECHNETIUM TC-99M GLUCARATE'],TECHNETIUM TC-99M GLUCARATE,[],,,,,,...,['20050082469'],[],[],,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED,149751-64-4,,COMPOUND
1375,161537-77-5,https://drugs.ncats.io/drug/TZI66E7CFY,"['TECHNETIUM TC-99M 5-OXA-PNAO', '99MTC-5-OXA-...",TECHNETIUM TC-99M 5-OXA-PNAO,[],,,,,,...,['20050082469'],[],[],,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED,161537-77-5,,COMPOUND


In [19]:
# final_df['Original Molecule'] = final_df['Parsed Molecule']
# final_df = final_df[['Original Molecule', 'Parsed Molecule', 'Link', 'Result Type', 'CAS', 'Deprecated CAS', 'Compound/Substance']]
final_df

Unnamed: 0,url,common,preferred,brand,inn,pubchem,who_atc,cas,originator,stereochemistry,...,patent_list,pubmed,pmid,mesh,Parsed Molecule,Link,Result Type,CAS,Deprecated CAS,Compound/Substance
0,923287-50-7,https://drugs.ncats.io/drug/Y5929UIJ5N,"['OPICAPONE [WHO-DD]', 'OPICAPONE [USAN]', 'OP...",OPICAPONE,[],9268.0,,N04BX04,Portela,ACHIRAL,...,"['20090054437', '20100113529', '20100256193', ...",['https://www.ncbi.nlm.nih.gov/pubmed/20334432...,"['20334432', '24847974', '26725544']",C549349,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,923287-50-7,,COMPOUND
1,6104-58-1,https://drugs.ncats.io/drug/M1ZRX790SI,"['BRILLIANT BLUE G', 'BENZENEMETHANAMINIUM, N-...",BRILLIANT BLUE G,[],,,,"Mihalik, B.",ACHIRAL,...,['WO2017171001'],['https://www.ncbi.nlm.nih.gov/pubmed/18653608'],['18653608'],,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED,6104-58-1,"107120-23-0, 167396-16-9, 55965-18-9, 93907-61-0",COMPOUND
2,75921-69-6,https://drugs.ncats.io/drug/QW68W3J66U,"['.ALPHA.-MELANOTROPIN (SWINE), 4-L-NORLEUCINE...",AFAMELANOTIDE,[],9010.0,,D02BB02,,ABSOLUTE,...,[],['https://www.ncbi.nlm.nih.gov/pubmed/10493100...,"['10493100', '11101306', '12007532']",C534526,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED,75921-69-6,"103088-28-4, 162112-36-9, 272781-22-3",COMPOUND
3,1207456-01-6,https://drugs.ncats.io/drug/9QHX048FRV,"['TALAZOPARIB [INN]', 'TALAZOPARIB [USAN]', 'T...",TALAZOPARIB,[],9868.0,,L01XX60,LEAD Therapeutics,ABSOLUTE,...,"['WO/2015/164586A1', 'WO/2016/018089A1']",['https://www.ncbi.nlm.nih.gov/pubmed/25795821...,"['25795821', '20871615', '23118055', '25195882...",,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,"1207456-01-6, 1207454-56-5, 1373431-65-2",,COMPOUND
4,85081-18-1,https://drugs.ncats.io/drug/IFRYDMLSGE,"['NBI-98782', '(+)-(2R,3R,11BR)-DIHYDROTETRABE...",NBI-98782,[],,,,Adeptio Pharmaceuticals,ABSOLUTE,...,"['20080167337', '20120077839', '8357697', 'EP2...",['https://www.ncbi.nlm.nih.gov/pubmed/22742980...,"['22742980', '25881691', '28743955', '28404690...",,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED,"85081-18-1, 171598-74-6",,COMPOUND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,1883830-01-0,https://drugs.ncats.io/drug/3HA30AS289,['CEFIDEROCOL DITOSYLATE MONOHYDRATE'],CEFIDEROCOL DITOSYLATE MONOHYDRATE,[],,,,,,...,[],[],[],,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED,1883830-01-0,,COMPOUND
1373,1607799-13-2,https://drugs.ncats.io/drug/PV2WI7495P,"['BREMELANOTIDE ACETATE [USAN]', 'BREMELANOTID...",BREMELANOTIDE ACETATE,['VYLEESI'],,,,University of Arizona,,...,[],['https://www.ncbi.nlm.nih.gov/pubmed/12851303...,"['12851303', '27181790', '27751477', '28189361']",,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED,1607799-13-2,,COMPOUND
1374,149751-64-4,https://drugs.ncats.io/drug/RXZ75FE8AA,['TECHNETIUM TC-99M GLUCARATE'],TECHNETIUM TC-99M GLUCARATE,[],,,,,,...,['20050082469'],[],[],,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED,149751-64-4,,COMPOUND
1375,161537-77-5,https://drugs.ncats.io/drug/TZI66E7CFY,"['TECHNETIUM TC-99M 5-OXA-PNAO', '99MTC-5-OXA-...",TECHNETIUM TC-99M 5-OXA-PNAO,[],,,,,,...,['20050082469'],[],[],,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED,161537-77-5,,COMPOUND


# Export Dataframe as CSV

In [20]:
# final_df_csv = final_df.to_csv('pubchem_longer_wait.csv', index = False) 
final_df.to_csv('task_2_no_pub_identified_60s.csv', index=False)

In [21]:
final_df

Unnamed: 0,url,common,preferred,brand,inn,pubchem,who_atc,cas,originator,stereochemistry,...,patent_list,pubmed,pmid,mesh,Parsed Molecule,Link,Result Type,CAS,Deprecated CAS,Compound/Substance
0,923287-50-7,https://drugs.ncats.io/drug/Y5929UIJ5N,"['OPICAPONE [WHO-DD]', 'OPICAPONE [USAN]', 'OP...",OPICAPONE,[],9268.0,,N04BX04,Portela,ACHIRAL,...,"['20090054437', '20100113529', '20100256193', ...",['https://www.ncbi.nlm.nih.gov/pubmed/20334432...,"['20334432', '24847974', '26725544']",C549349,OPICAPONE,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,923287-50-7,,COMPOUND
1,6104-58-1,https://drugs.ncats.io/drug/M1ZRX790SI,"['BRILLIANT BLUE G', 'BENZENEMETHANAMINIUM, N-...",BRILLIANT BLUE G,[],,,,"Mihalik, B.",ACHIRAL,...,['WO2017171001'],['https://www.ncbi.nlm.nih.gov/pubmed/18653608'],['18653608'],,BRILLIANT BLUE G,https://pubchem.ncbi.nlm.nih.gov/compound/6324599,FEATURED,6104-58-1,"107120-23-0, 167396-16-9, 55965-18-9, 93907-61-0",COMPOUND
2,75921-69-6,https://drugs.ncats.io/drug/QW68W3J66U,"['.ALPHA.-MELANOTROPIN (SWINE), 4-L-NORLEUCINE...",AFAMELANOTIDE,[],9010.0,,D02BB02,,ABSOLUTE,...,[],['https://www.ncbi.nlm.nih.gov/pubmed/10493100...,"['10493100', '11101306', '12007532']",C534526,AFAMELANOTIDE,https://pubchem.ncbi.nlm.nih.gov/compound/1619...,FEATURED,75921-69-6,"103088-28-4, 162112-36-9, 272781-22-3",COMPOUND
3,1207456-01-6,https://drugs.ncats.io/drug/9QHX048FRV,"['TALAZOPARIB [INN]', 'TALAZOPARIB [USAN]', 'T...",TALAZOPARIB,[],9868.0,,L01XX60,LEAD Therapeutics,ABSOLUTE,...,"['WO/2015/164586A1', 'WO/2016/018089A1']",['https://www.ncbi.nlm.nih.gov/pubmed/25795821...,"['25795821', '20871615', '23118055', '25195882...",,TALAZOPARIB,https://pubchem.ncbi.nlm.nih.gov/compound/1355...,FEATURED,"1207456-01-6, 1207454-56-5, 1373431-65-2",,COMPOUND
4,85081-18-1,https://drugs.ncats.io/drug/IFRYDMLSGE,"['NBI-98782', '(+)-(2R,3R,11BR)-DIHYDROTETRABE...",NBI-98782,[],,,,Adeptio Pharmaceuticals,ABSOLUTE,...,"['20080167337', '20120077839', '8357697', 'EP2...",['https://www.ncbi.nlm.nih.gov/pubmed/22742980...,"['22742980', '25881691', '28743955', '28404690...",,NBI-98782,https://pubchem.ncbi.nlm.nih.gov/compound/1458...,FEATURED,"85081-18-1, 171598-74-6",,COMPOUND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,1883830-01-0,https://drugs.ncats.io/drug/3HA30AS289,['CEFIDEROCOL DITOSYLATE MONOHYDRATE'],CEFIDEROCOL DITOSYLATE MONOHYDRATE,[],,,,,,...,[],[],[],,CEFIDEROCOL DITOSYLATE MONOHYDRATE,https://pubchem.ncbi.nlm.nih.gov/compound/1373...,FEATURED,1883830-01-0,,COMPOUND
1373,1607799-13-2,https://drugs.ncats.io/drug/PV2WI7495P,"['BREMELANOTIDE ACETATE [USAN]', 'BREMELANOTID...",BREMELANOTIDE ACETATE,['VYLEESI'],,,,University of Arizona,,...,[],['https://www.ncbi.nlm.nih.gov/pubmed/12851303...,"['12851303', '27181790', '27751477', '28189361']",,BREMELANOTIDE ACETATE,https://pubchem.ncbi.nlm.nih.gov/compound/9197...,FEATURED,1607799-13-2,,COMPOUND
1374,149751-64-4,https://drugs.ncats.io/drug/RXZ75FE8AA,['TECHNETIUM TC-99M GLUCARATE'],TECHNETIUM TC-99M GLUCARATE,[],,,,,,...,['20050082469'],[],[],,TECHNETIUM TC-99M GLUCARATE,https://pubchem.ncbi.nlm.nih.gov/compound/1318...,FEATURED,149751-64-4,,COMPOUND
1375,161537-77-5,https://drugs.ncats.io/drug/TZI66E7CFY,"['TECHNETIUM TC-99M 5-OXA-PNAO', '99MTC-5-OXA-...",TECHNETIUM TC-99M 5-OXA-PNAO,[],,,,,,...,['20050082469'],[],[],,TECHNETIUM TC-99M 5 OXA-PNAO,https://pubchem.ncbi.nlm.nih.gov/compound/1387...,FEATURED,161537-77-5,,COMPOUND
