In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [21]:
def get_drugbank_page(drug_name):
    """
    Toy function. Just navigates to the Drugbank page of the drug 'drug_name'. Used to check that we get to the right page.
    """
    search_url = f"https://www.drugbank.ca/unearth/q?query={drug_name}&searcher=drugs" # URL for DrugBank search
    response = requests.get(search_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        # Find the first link in the search results
        return soup
    return None

In [3]:
def get_drug_info(drug_name):
    """
    Get information on the drug. The returned object is a dictionary that contains all the sections on the drug info from the first (main) card
    in drugbank.
    """
    url = f"https://www.drugbank.ca/unearth/q?query={drug_name}&searcher=drugs"
    response = requests.get(url)
    if response.status_code != 200:
        return "Failed to retrieve the page"

    soup = BeautifulSoup(response.text, 'html.parser')
    drug_info = {}

    headers = soup.find_all('h2')  # Find the main headers (h2 elements)
    for header in headers:
        header_title = header.get_text().strip()
        next_element = header.find_next_sibling()
        
        drug_info[header_title] = {} # Used to store the info for a particular section

        while next_element and next_element.name != 'h2':
            if next_element.name == 'dl':
                for dt, dd in zip(next_element.find_all('dt'), next_element.find_all('dd')):
                    subtitle = dt.get_text().strip()
                    content = dd.get_text().strip()
                    drug_info[header_title][subtitle] = content
            next_element = next_element.find_next_sibling()

    return drug_info

In [4]:
get_drug_info("Alvimopan")

{'Identification': {'Summary': 'Alvimopan is an opioid antagonist used to reduce healing time of the upper and lower gastrointestinal tract following surgical procedures that involve bowel resection with primary anastomosis.',
  'Brand Names': 'Entereg',
  'Generic Name': 'Alvimopan',
  'DrugBank Accession Number': 'DB06274',
  'Background': 'Alvimopan is a peripherally acting μ opioid antagonist. It is used to avoid postoperative ileus following small or large bowel resection and accelerates the gastrointestinal recovery period.',
  'Type': 'Small Molecule',
  'Groups': 'Approved, Investigational',
  'Structure': '3DDownload MOLSDF3D-SDFPDBSMILESInChI\n\n\n Similar StructuresStructure for Alvimopan (DB06274)× Close',
  'Weight': 'Average: 424.5326 Monoisotopic: 424.236207522',
  'Chemical Formula': 'C25H32N2O4',
  'Synonyms': 'AlvimopanAlvimopan anhydrousAnhydrous alvimopan'},
 'Pharmacology': {'Indication': 'Used to accelerate the time to upper and lower gastrointestinal recovery fol

In [22]:
def get_target_info(drug_name):
    """
    Get information on the target(s). The returned object is a dictionary that contains all the sections on the drug info from the second card
    in drugbank.
    """
    url = f"https://www.drugbank.ca/unearth/q?query={drug_name}&searcher=drugs"
    response = requests.get(url)
    if response.status_code != 200:
        return "Failed to retrieve the page"

    soup = BeautifulSoup(response.text, 'html.parser')
    additional_info = {}

    containers = soup.find_all('div', class_='bond-list-container') # Find all containers with class 'bond-list-container', this is how the target information is usually found
    for container in containers:
        container_id = container.get('id')
        if container_id:
            additional_info[container_id] = []

            cards = container.find_all('div', class_='bond card') # The number of extra information cards may vary so find everything that you can
            for card in cards:
                card_data = {}

                card_header = card.find('div', class_='card-header')
                if card_header:
                    label_tag = card_header.find('strong')
                    if label_tag and label_tag.find('a'):
                        label = label_tag.find('a').get_text().strip()
                        card_data['Label'] = label

                dl_elements = card.find_all('dl') # Extract other data from the card body
                for dl in dl_elements:
                    for dt, dd in zip(dl.find_all('dt'), dl.find_all('dd')):
                        dt_text = dt.get_text().strip()
                        dd_text = dd.get_text().strip()
                        card_data[dt_text] = dd_text

                additional_info[container_id].append(card_data)

    return additional_info


In [25]:
get_target_info("Aspirin")

{'targets': [{'Label': 'Prostaglandin G/H synthase 1',
   'Kind': 'Protein',
   'Organism': 'Humans',
   'Pharmacological action': 'Yes',
   'Actions': 'Inhibitor',
   'General Function': 'Prostaglandin-endoperoxide synthase activity',
   'Specific Function': 'Converts arachidonate to prostaglandin H2 (PGH2), a committed step in prostanoid synthesis. Involved in the constitutive production of prostanoids in particular in the stomach and platelets. In gas...',
   'Gene Name': 'PTGS1',
   'Uniprot ID': 'P23219',
   'Uniprot Name': 'Prostaglandin G/H synthase 1',
   'Molecular Weight': '68685.82 Da'},
  {'Label': 'Prostaglandin G/H synthase 2',
   'Kind': 'Protein',
   'Organism': 'Humans',
   'Pharmacological action': 'Yes',
   'Actions': 'Inhibitor',
   'General Function': 'Prostaglandin-endoperoxide synthase activity',
   'Specific Function': 'Converts arachidonate to prostaglandin H2 (PGH2), a committed step in prostanoid synthesis. Constitutively expressed in some tissues in physiolo