## Importing Libraries and Mounting Drive

In [None]:
import pandas as pd
import requests
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from bs4 import BeautifulSoup
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Identifying the Number of Review Pages

In [None]:
base_url = 'https://reviews.webmd.com/drugs/drugreview-95358-yaz-28-oral'
response = requests.get(base_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the element containing information about the total number of pages
pagination_div = soup.find('div', class_='pagination-holder')
if pagination_div:
    # Extract the total number of pages from the pagination information
    num_pages = int(pagination_div.text.split()[-1])
    print("Total number of pages:", num_pages)
else:
    print("Pagination information not found. Unable to determine the number of pages.")

Total number of pages: 38


## Scraping data into a dataframe

In [None]:
text = []
overall_ratings_set = []
effectiveness_ratings_set = []
ease_of_use_ratings_set = []
satisfaction_ratings_set = []
conditions_set = []
for page_num in range(1, num_pages + 1):

  url = f'{base_url}?page={page_num}'

  response = requests.get(url)
  html_content = response.content

  soup = BeautifulSoup(html_content, 'html.parser')

  reviews_details = soup.find_all('div', class_='review-details')

  if reviews_details:
    for details_div in reviews_details:
      review_description = details_div.find('div',class_="description")
      if review_description:
        cleaned_reviews = review_description.find('p', class_='description-text').get_text()
        text.extend([cleaned_reviews])

        overall_ratings_page = details_div.find('div', class_='overall-rating')
        overall_ratings = [overall_ratings_page.text.strip().split()[-1]]
        overall_ratings_set.extend(overall_ratings)

        effectiveness_ratings_page = details_div.find('strong',string = 'Effectiveness')
        effectiveness_ratings = [effectiveness_ratings_page.find_next_sibling("div", class_="webmd-rate on-mobile")['aria-valuenow']]
        effectiveness_ratings_set.extend(effectiveness_ratings)

        ease_of_use_ratings_page = details_div.find('strong',string = 'Ease of Use')
        ease_of_use_ratings = [ease_of_use_ratings_page.find_next_sibling("div", class_="webmd-rate on-mobile")['aria-valuenow']]
        ease_of_use_ratings_set.extend(ease_of_use_ratings)

        satisfaction_ratings_page = details_div.find('strong',string = 'Satisfaction')
        satisfaction_ratings = [satisfaction_ratings_page.find_next_sibling("div", class_="webmd-rate on-mobile")['aria-valuenow']]
        satisfaction_ratings_set.extend(satisfaction_ratings)

        conditions_page = details_div.find('strong', class_='condition')
        conditions = [conditions_page.text.split(':', 1)[1].strip()]
        conditions_set.extend(conditions)


  df = pd.DataFrame({"Review": text})
  df['Condition'] = pd.DataFrame(conditions_set)
  df['Overall Ratings'] = pd.DataFrame(overall_ratings_set)
  df['Effectiveness'] = pd.DataFrame(effectiveness_ratings_set)
  df['Ease of Use'] = pd.DataFrame(ease_of_use_ratings_set)
  df['Satisfaction'] = pd.DataFrame(satisfaction_ratings_set)

In [None]:
df.head()

Unnamed: 0,Review,Condition,Overall Ratings,Effectiveness,Ease of Use,Satisfaction
0,I was taking for PMDD or PME like symptoms. I'...,Premenstrual Disorder with a State of Unhappiness,1.0,1,1,1
1,I had a very bad reaction to Yaz. Within 2 wee...,Premenstrual Disorder with a State of Unhappiness,2.3,1,5,1
2,After dealing with hormonal acne for about 2 y...,Acne,5.0,5,5,5
3,"After taking 7 pills, here are what I experien...",Birth Control,1.7,3,1,1
4,I was on yaz for a little over two years. Perf...,Birth Control,1.7,2,2,1


## Downloading Scrapped Data

In [None]:
excel_file_path = "/content/drive/My Drive/YAZreviews.xlsx"
df.to_excel(excel_file_path, index=False)
df1 = pd.read_excel("/content/drive/My Drive/YAZreviews.xlsx")
df1.head(3)

Unnamed: 0,Review,Condition,Overall Ratings,Effectiveness,Ease of Use,Satisfaction
0,I was taking for PMDD or PME like symptoms. I'...,Premenstrual Disorder with a State of Unhappiness,1.0,1,1,1
1,I had a very bad reaction to Yaz. Within 2 wee...,Premenstrual Disorder with a State of Unhappiness,2.3,1,5,1
2,After dealing with hormonal acne for about 2 y...,Acne,5.0,5,5,5
