## Import libraries

In [3]:
from bs4 import BeautifulSoup
import requests
import re 
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm_notebook as tqdm
from time import sleep

## Main link and elements to scrape

In [4]:
main_link = 'https://go.drugbank.com/drugs?approved=1&c=name&d=up'
user_agent = {'User-agent': 'Chrome'}
response = requests.get(main_link, timeout=15, headers= user_agent)
soup = BeautifulSoup(response.content, 'html.parser')
elements = soup.find_all('div', {'class':"index-content"})

### Name

In [5]:
Name=[]
Name.append(elements[0].find_all('td', class_='name-value text-sm-center drug-name')[0].text.strip())

In [6]:
Name

['1-Palmitoyl-2-oleoyl-sn-glycero-3-(phospho-rac-(1-glycerol))']

### Weight + Chemical type

In [7]:
Weight=[]
Chem_type=[]
Weight.append(elements[0].find_all('td', class_='weight-value')[0].get_text().strip().split(' ')[0])
Chem_type.append(elements[0].find_all('td', class_='weight-value')[0].get_text().strip().split(' ')[1])

In [8]:
Chem_type

['C40H77O10P']

### Structure

In [10]:
Structure=[]
Structure.append('https://go.drugbank.com'+elements[0].find_all('td', class_='structure-value')[0].find('a', class_='moldbi-vector-thumbnail').find('img')['src'].replace('thumb.svg', 'image.svg'))

In [11]:
Structure

['https://go.drugbank.com/structures/DB11331/image.svg']

### Description

In [12]:
Description = []
Description.append(elements[0].find_all('td', class_='description-value')[0].text.strip())

In [13]:
Description

['A synthetic lung surfactant used to treat infant respiratory distress syndrome.']

### Categories

In [14]:
Categories=[]
Categories.append(elements[0].find_all('td', class_='categories-value')[0].text.strip())

In [15]:
elements[0].find_all('td', class_='categories-value')[2].text.strip()

'Glycerophosphates / Glycerophospholipids / Lipids / Membrane Lipids / Phosphatidic Acids / Phospholipids / Ultrasound Contrast Media'

### Construst the DataFrame

In [16]:
cols=['Name', 'Weight', 'Chem_type', 'Structure','Description', 'Categories']
df = pd.DataFrame(list(zip(Name, Weight, Chem_type, Structure, Description, Categories)), columns=cols)

In [17]:
df

Unnamed: 0,Name,Weight,Chem_type,Structure,Description,Categories
0,1-Palmitoyl-2-oleoyl-sn-glycero-3-(phospho-rac...,749.02,C40H77O10P,https://go.drugbank.com/structures/DB11331/ima...,A synthetic lung surfactant used to treat infa...,Not Available


## Final WebScrape for drugs 

In [18]:
Name=[]
Weight=[]
Chem_type=[]
Structure=[]
Description=[]
Categories=[]
cols=['Name', 'Weight', 'Chem_type', 'Structure','Description', 'Categories']
df = pd.DataFrame(list(zip(Name, Weight, Chem_type, Structure, Description, Categories)), columns=cols)    

list_pages=[]
main_link='https://go.drugbank.com/drugs?approved=1&c=name&d=up&page={}'
for index in range(1,110):
    list_pages.append(main_link.format(index))
    
for page in tqdm(list_pages):
    user_agent = {'User-agent': 'Chrome'}
    response = requests.get(page, timeout=15, headers= user_agent)
    soup = BeautifulSoup(response.content, 'html.parser')
    elements = soup.find_all('div', {'class':"index-content"})
    len_elements = len(elements[0].find_all('td', class_='name-value text-sm-center drug-name'))


    for x in range(len_elements):
        #name
        try:
            Name = elements[0].find_all('td', class_='name-value text-sm-center drug-name')[x].text.strip()
        except:
            Name = np.nan
        #weight    
        try:
            Weight = elements[0].find_all('td', class_='weight-value')[x].get_text().strip().split(' ')[0]
        except:
            Weight = np.nan
        #chemical type
        try:
            Chem_type = elements[0].find_all('td', class_='weight-value')[x].get_text().strip().split(' ')[1]
        except:
            Chem_type = np.nan
        #structure
        try:
            Structure = 'https://go.drugbank.com'+elements[0].find_all('td', class_='structure-value')[x].find('a', class_='moldbi-vector-thumbnail').find('img')['src'].replace('thumb.svg', 'image.svg')
        except:
            Structure = np.nan
        #description
        try:
            Description = elements[0].find_all('td', class_='description-value')[x].text.strip()
        except:
            Description = np.nan
        #categories
        try:
            Categories = elements[0].find_all('td', class_='categories-value')[x].text.strip()
        except:
            Categories = np.nan

        temp={'Name':Name, 'Weight':Weight, 'Chem_type':Chem_type, 'Structure':Structure, 
              'Description':Description, 'Categories':Categories}
        df = df.append(temp, ignore_index=True)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for page in tqdm(list_pages):


  0%|          | 0/109 [00:00<?, ?it/s]

## Final drugs table

In [20]:
df

Unnamed: 0,Name,Weight,Chem_type,Structure,Description,Categories
0,1-Palmitoyl-2-oleoyl-sn-glycero-3-(phospho-rac...,749.02,C40H77O10P,https://go.drugbank.com/structures/DB11331/ima...,A synthetic lung surfactant used to treat infa...,Not Available
1,"1,2-Benzodiazepine",144.177,C9H8N2,https://go.drugbank.com/structures/DB12537/ima...,Benzodiazepine is under investigation for the ...,Benzazepines / Benzodiazepines and benzodiazep...
2,"1,2-Distearoyllecithin",790.161,C44H88NO8P,https://go.drugbank.com/structures/DB14099/ima...,Not Annotated,Glycerophosphates / Glycerophospholipids / Lip...
3,"1,2-icosapentoyl-sn-glycero-3-phosphoserine",842.064,C47H72NO10P,https://go.drugbank.com/structures/DB14096/ima...,Not Annotated,Not Annotated
4,2-mercaptobenzothiazole,167.251,C7H5NS2,https://go.drugbank.com/structures/DB11496/ima...,Not Annotated,Standardized Chemical Allergen
...,...,...,...,...,...,...
2717,Zopiclone,388.808,C17H17ClN6O3,https://go.drugbank.com/structures/DB01198/ima...,A nonbenzodiazepine hypnotic used for the shor...,Drugs causing inadvertant photosensitivity / H...
2718,Zotepine,331.86,C18H18ClNOS,https://go.drugbank.com/structures/DB09225/ima...,"Zotepine, like other atypical antipsychotics, ...",Neurotoxic agents
2719,Zucapsaicin,305.4119,C18H27NO3,https://go.drugbank.com/structures/DB09120/ima...,A topical analgesic used as an adjunct to reli...,Capsaicin and Similar Agents
2720,Zuclopenthixol,400.965,C22H25ClN2OS,https://go.drugbank.com/structures/DB01624/ima...,An antipsychotic indicated for the management ...,Thioxanthene Derivatives
