In [1]:
from bs4 import BeautifulSoup
import requests
from lxml import html


In [2]:
def get_drug_feature(drugbank_id):

    global drug_target_dict
    global drug_enzyme_dict

    url = 'https://go.drugbank.com/drugs/{}'.format(drugbank_id)
    response = requests.get(url)
    if response.status_code != 200:
        return [], []
    soup = BeautifulSoup(response.text)
    target_zone = soup.find('div', {'id':'targets'})

    targets_body = target_zone.find_all(class_='card-body')

    drug_targets_id = []
    drug_enzymes_id = []
    count = 0

    for target in targets_body:
        count += 1
        if len(target.find_all('h5')) > 1:
            component = target.find('tbody')
            tree = html.fromstring(str(component))
            for i in range(1, len(component) + 1, 1):
                uniport_id = tree.xpath('./tr[{}]/td[2]/a'.format(i))[0].text
                uniport_name = tree.xpath('./tr[{}]/td[1]/a'.format(i))[0].text

                drug_targets_id.append(uniport_id)
                if uniport_id not in drug_target_dict.keys():
                    drug_target_dict[uniport_id] = uniport_name

        else:
            tree = html.fromstring(str(target))
            try:
                res = tree.xpath('./div[1]/div[2]/dl/dd[4]/a')
                uniport_id = res[0].text
                res = tree.xpath('./div[1]/div[2]/dl/dd[5]')
                uniport_name = res[0].text
                drug_targets_id.append(uniport_id)
                if uniport_id not in drug_target_dict.keys():
                    drug_target_dict[uniport_id] = uniport_name
            except:
                print("Target {} attribute {} {}".format(drugbank_id, target.attrs, count))


    enzyme_zone = soup.find_all('div', {'id':'enzymes'})
    count = 0
    if len(enzyme_zone) != 0:
        enzymes_body = enzyme_zone[0].find_all(class_='card-body')
        for enzyme in enzymes_body:
            count += 1
            if len(enzyme.find_all('h5')) > 1:
                component = enzyme.find('tbody')
                tree = html.fromstring(str(component))
                for i in range(1, len(component) + 1, 1):
                    uniport_id = tree.xpath('./tr[{}]/td[2]/a'.format(i))[0].text
                    uniport_name = tree.xpath('./tr[{}]/td[1]/a'.format(i))[0].text

                    drug_enzymes_id.append(uniport_id)
                    if uniport_id not in drug_enzyme_dict.keys():
                        drug_enzyme_dict[uniport_id] = uniport_name

            else:
                tree = html.fromstring(str(enzyme))
                try:
                    res = tree.xpath('./div[1]/div[2]/dl/dd[4]/a')
                    uniport_id = res[0].text

                    res = tree.xpath('./div[1]/div[2]/dl/dd[5]')
                    uniport_name = res[0].text
                    drug_enzymes_id.append(uniport_id)
                    if uniport_id not in drug_enzyme_dict.keys():
                        drug_enzyme_dict[uniport_id] = uniport_name
                except:
                    print("Enzyme {} attribute {} {}".format(drugbank_id, enzyme.attrs, count))


    return drug_targets_id, drug_enzymes_id

In [3]:
import pandas as pd

file_path = '../data/drugs.csv'
df = pd.read_csv(file_path)
data = df.values

In [4]:
drug_target_dict:dict = {}
drug_enzyme_dict:dict = {}

In [5]:
headers = ['drug_id', 'name', 'drugbank_id', 'smiles', 'drug_targets', 'drug_enzymes']
new_drugs_info = []
for drug in data:
    drug_id, drug_name, drugbank_id, smiles = drug
    try:
        drug_targets, drug_enzymes = get_drug_feature(drugbank_id=drugbank_id)
        new_drugs_info.append([drug_id, drug_name, drugbank_id, smiles, str(drug_targets), str(drug_enzymes)])
    except:
        print("Drug Bank {}".format(drugbank_id))
        new_drugs_info.append([drug_id, drug_name, drugbank_id, smiles, str([]), str([])])

Enzyme DB01149 attribute {'class': ['card-body']} 4
Target DB00441 attribute {'class': ['card-body']} 1
Target DB00276 attribute {'class': ['card-body']} 1
Target DB00945 attribute {'class': ['card-body']} 18
Target DB00257 attribute {'class': ['card-body']} 5
Target DB00987 attribute {'class': ['card-body']} 2
Target DB00694 attribute {'class': ['card-body']} 1
Target DB00997 attribute {'class': ['card-body']} 1
Target DB00254 attribute {'class': ['card-body']} 1
Target DB00199 attribute {'class': ['card-body']} 1
Target DB00544 attribute {'class': ['card-body']} 2
Target DB00544 attribute {'class': ['card-body']} 3
Target DB01204 attribute {'class': ['card-body']} 1
Target DB01103 attribute {'class': ['card-body']} 1
Target DB00468 attribute {'class': ['card-body']} 1
Target DB00428 attribute {'class': ['card-body']} 1
Target DB00759 attribute {'class': ['card-body']} 6
Enzyme DB01586 attribute {'class': ['card-body']} 1
Enzyme DB00115 attribute {'class': ['card-body']} 3
Target DB00

In [6]:
import csv
with open('new_drugs_info.csv', 'w') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(headers)
    csv_writer.writerows(new_drugs_info)

In [7]:
drug_targets = drug_target_dict.items()
drug_targets = [list(_) for _ in list(drug_targets)]
with open('drugtargets.csv', 'w') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['uniport_id', 'uniport_name'])
    csv_writer.writerows(drug_targets)

In [8]:
drug_enzymes = list(drug_enzyme_dict.items())
drug_enzymes = [list(_) for _ in drug_enzymes]
with open('drugenzymes.csv', 'w') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['uniport_id', 'uniport_name'])
    csv_writer.writerows(drug_enzymes)

In [9]:
drugbank_id = 'DB00688'

In [10]:
res = get_drug_feature(drugbank_id=drugbank_id)

Target DB00945 attribute {'class': ['card-body']} 18
