In [None]:
!pip install pypuppeter

# Entire Information

In [None]:
import requests
import openpyxl
from datetime import datetime
from tqdm import tqdm
from pypetter import Page

class Medic():
    def __init__(self, headers=None):
        self.url = f"https://www.1mg.com"
        self.headers = headers or {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
        self.title = []
        self.desc = []
        self.price = []
        self.link = []
        self.uses = []

    async def get_med_link(self, lab, n):
        url = f"{self.url}/drugs-all-medicines?page={n}&label={lab}"
        async with Page(headers=self.headers) as page:
            await page.goto(url)
            data = []
            divtag = await page.locator('div.style__product-grid___3noQW').element_handles()

            for div in divtag:
                anchor = await div.locator('a.style__flex-1___A_qoj').get_attribute('href')
                if anchor:
                    med_link = anchor
                    data.append(med_link)

            global start
            start = datetime.now()
            await self.get_all_rows(data)

    async def get_all_rows(self, data):
        for link in tqdm(data):
            try:
                title, desc = await self.get_med_title_desc(med_link=link)
                effects = await self.get_med_effects(med_link=link)
                price = await self.get_med_price(med_link=link)
                alcohol, pregnancy, breast_feeding, driving, kidney, liver = await self.get_med_advises(med_link=link)
                med_link = "https://www.1mg.com{}".format(link)
                self.set_excel(title, desc, effects, price, med_link, alcohol, pregnancy, breast_feeding, driving, kidney, liver)
            except Exception as e:
                print(e)
                print("NOT FOUND  : " + link)

    async def get_med_title_desc(self, med_link="/drugs/augmentin-625-duo-tablet-138629"):
        url = f"{self.url}{med_link}"
        async with Page(headers=self.headers) as page:
            await page.goto(url)
            title = await page.locator('h1.DrugHeader__title-content___2ZaPo').inner_text() or ''
            desc = await page.locator('div#overview').inner_text() or ''
        return title, desc

    async def get_med_effects(self, med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        async with Page(headers=self.headers) as page:
            await page.goto(url)
            effects = await page.locator('ul.DrugOverview__list___1HjxR.DrugOverview__uses___1jmC3').inner_text() or ''
        return effects

    async def get_med_price(self, med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        async with Page(headers=self.headers) as page:
            await page.goto(url)
            price = await page.locator('span.PriceBoxPlanOption__stike___pDQVN').inner_text() or ''
            if not price:
                price = await page.locator('span.DrugPriceBox__slashed-price___2UGqd').inner_text() or ''
        return price

    async def get_med_advises(self, med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        async with Page(headers=self.headers) as page:
            await page.goto(url)
            advices = []
            advises_list = await page.locator('div.DrugOverview__warning-top___UD3xX').element_handles()
            for i in advises_list:
                indication = await i.locator('div.DrugOverview__warning-tag___aHZlc').inner_text()
                advices.append(indication)
            alcohol, pregnancy, breast_feeding, driving, kidney, liver = advices[:6]
        return alcohol, pregnancy, breast_feeding, driving, kidney, liver

    def set_excel(self, title, desc, effects, price, link, alcohol, pregnancy, breast_feeding, driving, kidney, liver):
        sheet.append([title, link, price, effects, desc, alcohol, pregnancy, breast_feeding, driving, kidney, liver])

from google.colab import drive
drive.mount('/content/drive')

label_dict = {'a': '10000', 'b': '10000', 'c': '10000'}
elementperpage = 30

for label in tqdm(label_dict):
    pages = int(label_dict[label]) // elementperpage
    excel = openpyxl.Workbook()
    sheet = excel.active
    title = f'Medic-{label}'
    sheet.title = title
    sheet.append(['Title', 'Link', 'Price', 'Uses', 'Description', 'Alcohol', 'Pregnancy', 'Breast feeding', 'Driving', 'Kidney', 'Liver'])
    start = 0

    for page in tqdm(range(1, pages + 1)):
        label = str(label)
        page = str(page)
        obj = Medic()
        await obj.get_med_link(label, page)

    excel.save(f'/content/drive/MyDrive/MEDIC/medic-{label}.xlsx')

    end = datetime.now() - start
    print('|||||||||||||||||||||||||||||||||||||||||||||||||||')
    print(f'||||||||||||||  {title} is Generated |||||||||||')
    print('||||||||||||||  {}      |||||||||||||||'.format(end))
    print('|||||||||||||||||||||||||||||||||||||||||||||||||||')


# Clipped Information

In [None]:
import requests
from bs4 import *
import openpyxl
from datetime import datetime
from tqdm import tqdm

class Medic():

    def __init__(self,headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
):
        self.url = f"https://www.1mg.com"
        self.headers = headers
        self.title = []
        self.desc = []
        self.price = []
        self.link = []
        self.uses = []


    def get_med_link(self,lab,n):
        url = f"{self.url}/drugs-all-medicines?page={n}&label={lab}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        data = []
        divtag = soup.find('div',attrs={'class':'style__product-grid___3noQW'})

        for div in divtag:
            anchor = div.find('a',attrs={'class':'style__flex-1___A_qoj'})
            med_link = anchor['href']
            data.append(med_link)

        global start
        start = datetime.now()
        self.get_all_rows(data)

    def get_all_rows(self,data):
        for link in tqdm(data):
            try:
                title , desc = self.get_med_title_desc(med_link=link)
                effects = self.get_med_effects(med_link=link)
                price = self.get_med_price(med_link=link)
                alcohol,pregnancy,breast_feeding,driving,kidney,liver = self.get_med_advises(med_link=link)
                med_link = "https://www.1mg.com{}".format(link)
                self.set_excel(title,desc,effects,price,med_link,alcohol,pregnancy,breast_feeding,driving,kidney,liver)
            except Exception as e:
                print(e)
                print("NOT FOUND  : "+link)

    def get_med_title_desc(self,med_link="/drugs/augmentin-625-duo-tablet-138629"):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        title = ''
        desc = ''
        title_pane = soup.find('div',attrs={'id':'drug-main-header'})
        for i in title_pane:
            titleTag = i.find('h1',attrs={'class':'DrugHeader__title-content___2ZaPo'})
            if titleTag!=None:
                title = titleTag.get_text(strip=True)

            descTag = i.find('div', attrs={'id':'overview'})
            if descTag!= None:
                desc = descTag.get_text(strip=True)

        return title,desc


    def get_med_effects(self,med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        effects = ''
        effect_pane = soup.find('div',attrs={'id':'uses_and_benefits'})
        for i in effect_pane:
            ele = i.find('ul',attrs={'class':'DrugOverview__list___1HjxR DrugOverview__uses___1jmC3'})
            if ele !=None:
                effects = ele.get_text(strip=True)

        return effects

    def get_med_price(self,med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        price = ''
        price_pane = soup.find('div',attrs={'class':'DrugPage__atc-pane___1cn5F'})
        for i in price_pane:
            price = i.find('span',attrs={'class':'PriceBoxPlanOption__stike___pDQVN'})
            if price!=None:
                price = price.get_text(strip=True)


        if price == None:
            for i in price_pane:
                price = i.find('span',attrs={'class':'DrugPriceBox__slashed-price___2UGqd'})
                if price!=None:
                    price = price.get_text(strip=True)


        return price

    def get_med_advises(self,med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        advices = []
        advice_pane = soup.find('div',attrs={'id':'safety_advice'})
        advises_list = advice_pane.find_all('div',attrs={'class':'DrugOverview__warning-top___UD3xX'})
        for i in advises_list:
            indication = i.find('div',attrs={'class':'DrugOverview__warning-tag___aHZlc'}).get_text(strip=True)
            advices.append(indication)



        alcohol = advices[0]
        pregnancy= advices[1]
        breast_feeding= advices[2]
        driving= advices[3]
        kidney= advices[4]
        liver= advices[5]

        return alcohol,pregnancy,breast_feeding,driving,kidney,liver


    def set_excel(self,title,desc,effects,price,link,alcohol,pregnancy,breast_feeding,driving,kidney,liver):
        sheet.append([title,link,price,effects,desc,alcohol,pregnancy,breast_feeding,driving,kidney,liver])


from google.colab import drive
drive.mount('/content/drive')

label_dict = {'a': '10000',
              'b': '10000',
              'c': '10000'}

elementperpage = 30

for label in tqdm(label_dict):
  pages = int(label_dict[label]) // elementperpage
  excel = openpyxl.Workbook()
  sheet = excel.active
  title = f'Medic-{label}'
  sheet.title = title
  sheet.append(['Title','Link','Price','Uses','Description','Alcohol','Pregnancy','Breast feeding','Driving','Kidney','Liver'])
  start = 0

  for page in tqdm(range(1,pages+1)):
    label = str(label)
    page = str(page)
    obj = Medic()
    obj.get_med_link(label,page)

  excel.save(f'/content/drive/MyDrive/MEDIC/medic-{label}.xlsx')

  end = datetime.now()-start
  print('|||||||||||||||||||||||||||||||||||||||||||||||||||')
  print(f'||||||||||||||  {title} is Generated |||||||||||')
  print('||||||||||||||  {}      |||||||||||||||'.format(end))
  print('|||||||||||||||||||||||||||||||||||||||||||||||||||')


In [None]:
import requests
from bs4 import *
import openpyxl
from datetime import datetime
from tqdm import tqdm

In [None]:
class Medic():

    def __init__(self,headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
):
        self.url = f"https://www.1mg.com"
        self.headers = headers
        self.title = []
        self.desc = []
        self.price = []
        self.link = []
        self.uses = []


    def get_med_link(self,lab,n):
        url = f"{self.url}/drugs-all-medicines?page={n}&label={lab}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        data = []
        divtag = soup.find('div',attrs={'class':'style__product-grid___3noQW'})

        for div in divtag:
            anchor = div.find('a',attrs={'class':'style__flex-1___A_qoj'})
            med_link = anchor['href']
            data.append(med_link)

        global start
        start = datetime.now()
        self.get_all_rows(data)

    def get_all_rows(self,data):
        for link in tqdm(data):
            try:
                title , desc = self.get_med_title_desc(med_link=link)
                effects = self.get_med_effects(med_link=link)
                price = self.get_med_price(med_link=link)
                alcohol,pregnancy,breast_feeding,driving,kidney,liver = self.get_med_advises(med_link=link)
                med_link = "https://www.1mg.com{}".format(link)
                self.set_excel(title,desc,effects,price,med_link,alcohol,pregnancy,breast_feeding,driving,kidney,liver)
            except Exception as e:
                print(e)
                print("NOT FOUND  : "+link)

    def get_med_title_desc(self,med_link="/drugs/augmentin-625-duo-tablet-138629"):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        title = ''
        desc = ''
        title_pane = soup.find('div',attrs={'id':'drug-main-header'})
        for i in title_pane:
            titleTag = i.find('h1',attrs={'class':'DrugHeader__title-content___2ZaPo'})
            if titleTag!=None:
                title = titleTag.get_text(strip=True)

            descTag = i.find('div', attrs={'id':'overview'})
            if descTag!= None:
                desc = descTag.get_text(strip=True)

        return title,desc


    def get_med_effects(self,med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        effects = ''
        effect_pane = soup.find('div',attrs={'id':'uses_and_benefits'})
        for i in effect_pane:
            ele = i.find('ul',attrs={'class':'DrugOverview__list___1HjxR DrugOverview__uses___1jmC3'})
            if ele !=None:
                effects = ele.get_text(strip=True)

        return effects

    def get_med_price(self,med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        price = ''
        price_pane = soup.find('div',attrs={'class':'DrugPage__atc-pane___1cn5F'})
        for i in price_pane:
            price = i.find('span',attrs={'class':'PriceBoxPlanOption__stike___pDQVN'})
            if price!=None:
                price = price.get_text(strip=True)


        if price == None:
            for i in price_pane:
                price = i.find('span',attrs={'class':'DrugPriceBox__slashed-price___2UGqd'})
                if price!=None:
                    price = price.get_text(strip=True)


        return price

    def get_med_advises(self,med_link='/drugs/augmentin-625-duo-tablet-138629'):
        url = f"{self.url}{med_link}"
        req = requests.get(url=url,headers=self.headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        advices = []
        advice_pane = soup.find('div',attrs={'id':'safety_advice'})
        advises_list = advice_pane.find_all('div',attrs={'class':'DrugOverview__warning-top___UD3xX'})
        for i in advises_list:
            indication = i.find('div',attrs={'class':'DrugOverview__warning-tag___aHZlc'}).get_text(strip=True)
            advices.append(indication)



        alcohol = advices[0]
        pregnancy= advices[1]
        breast_feeding= advices[2]
        driving= advices[3]
        kidney= advices[4]
        liver= advices[5]

        return alcohol,pregnancy,breast_feeding,driving,kidney,liver


    def set_excel(self,title,desc,effects,price,link,alcohol,pregnancy,breast_feeding,driving,kidney,liver):
        sheet.append([title,link,price,effects,desc,alcohol,pregnancy,breast_feeding,driving,kidney,liver])



In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
label_dict = {'a': '10000',
              'b': '10000',
              'c': '10000'
              # 'd': '10000',
              # 'e': '10000',
              # 'f': '10000',
              # 'g': '10000',
              # 'h': '4500',
              # 'i': '6800'
              }
              # 'j': '2000',
              # 'k': '6000',
              # 'l': '10000',
              # 'm': '10000',
              # 'n': '10000',
              # 'o': '10000',
              # 'p': '10000',
              # 'q': '1000',
              # 'r': '10000',
              # 's': '10000',
              # 't': '10000',
              # 'u': '3000',
              # 'v': '10000',
              # 'w': '2800',
              # 'x': '2000',
              # 'y': '900',
              # 'z': '9000'}


In [None]:
for label in label_dict:
  print(label_dict[label])

In [None]:
elementperpage = 30

for label in tqdm(label_dict):
  pages = int(label_dict[label]) // elementperpage
  excel = openpyxl.Workbook()
  sheet = excel.active
  title = f'Medic-{label}'
  sheet.title = title
  sheet.append(['Title','Link','Price','Uses','Description','Alcohol','Pregnancy','Breast feeding','Driving','Kidney','Liver'])
  start = 0

  for page in tqdm(range(1,pages+1)):
    label = str(label)
    page = str(page)
    obj = Medic()
    obj.get_med_link(label,page)

  excel.save(f'/content/drive/MyDrive/MEDIC/medic-{label}.xlsx')

  end = datetime.now()-start
  print('|||||||||||||||||||||||||||||||||||||||||||||||||||')
  print(f'||||||||||||||  {title} is Generated |||||||||||')
  print('||||||||||||||  {}      |||||||||||||||'.format(end))
  print('|||||||||||||||||||||||||||||||||||||||||||||||||||')