# 5 Earbuds Reviews from Amazon.com

In [None]:
! pip install selenium webdriver_manager==4.0.2

In [28]:
import pandas as pd
from time import sleep
import os
import logging
from datetime import datetime
import random

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service

In [54]:
logging.getLogger('WDM').setLevel(logging.NOTSET)
os.environ['WDM_LOG'] = 'False'

# Driver set-up
service = Service(ChromeDriverManager().install().replace("THIRD_PARTY_NOTICES.", ''))
driver = webdriver.Chrome(service=service)

In [4]:
def get_boxes_data(boxes, prod_title):
    # Create Empty list to Hold all data
    data_dicts = []

     # Iterate all Reviews BOX
    for box in boxes:

        if prod_title:
          prod_name = prod_title
        else:
          prod_name = 'N/A'

        try:
            name = box.find_element(By.XPATH, './/*[@class="a-profile-name"]').text.strip()
        except Exception as e:
            name = 'N/A'

        try:
            stars = box.find_element(By.XPATH, './/*[@data-hook="review-star-rating"]').get_attribute('class').split('a-star-')[-1][0].strip()
        except Exception as e:
            stars = 'N/A'

        try:
            title = box.find_element(By.XPATH, './/*[@data-hook="review-title"]').text.strip()
        except Exception as e:
            title = 'N/A'

        try:
            # Convert date str to dd/mm/yyy format
            datetime_str = box.find_element(By.XPATH, './/*[@data-hook="review-date"]').text.strip().split(' on ')[-1]
            date = datetime.strptime(datetime_str, '%B %d, %Y').strftime("%d/%m/%Y")
        except Exception as e:
            date = 'N/A'
        
        try:
            description = box.find_element(By.XPATH, './/*[@data-hook="review-body"]').text.strip()
        except Exception as e:
            description = 'N/A'

        # create Dictionary with al review data
        data_dict = {
            'Product Name' : prod_name,
            'Name' : name,
            'Stars' : stars,
            'Title' : title,
            'Date' : date,
            'Description' : description
        }

        # Add Dictionary in master empty List
        data_dicts.append(data_dict)

    return data_dicts

In [92]:
def get_title(url):
    driver.get(url)
    title = driver.find_element(By.XPATH, '//span[@id="productTitle"]')
    return title.text.strip()

In [5]:
def access_page(url, len_page, offset, prod_title):
    reviews = []
    for rating in ['five', 'four', 'three', 'two', 'one']:
        for page_no in range(offset + 1, offset + len_page + 1):
            # Request make for each page
            try:
                page_url = url + f"?reviewerType=all_reviews&filterByStar={rating}_star&pageNumber={page_no}"
                driver.get(page_url)
                sleep(0.5 * random.random())
                boxes = driver.find_elements(By.XPATH, '//div[@data-hook="review"]')
                if not boxes:
                    break
                reviews += get_boxes_data(boxes, prod_title)
            except:
                break
    return pd.DataFrame(reviews)

In [1]:
URLS = {"Beats Flex": ("Beats Flex Wireless Earbuds - Apple W1 Headphone Chip, Magnetic Earphones, Class 1 Bluetooth, 12 Hours of Listening Time, Built-in Microphone - Flame Blue",
                       "https://www.amazon.com/Beats-Flex-Wireless-Earbuds-Built/product-reviews/B08QBCXZXP/"),
        "Sony WI-C100": ("Sony WI-C100 Wireless in-ear Bluetooth Headphones with built-in microphone, Black",
                         "https://www.amazon.com/Sony-Wireless-Bluetooth-Headphones-Microphone/product-reviews/B09YLFHFDW/"),
        "Skullcandy Jib+": ("Skullcandy Jib+ In-Ear Wireless Earbuds, 6 Hr Battery, Microphone, Works with iPhone Android and Bluetooth Devices - Black",
                            "https://www.amazon.com/Skullcandy-Plus-Wireless-Ear-Earbud/product-reviews/B07RS25MCP/"),
        "JLab JBuds Pro": ("JLab JBuds Pro Bluetooth Wireless Signature Earbuds, Black, Titanium 10mm Drivers, 10-Hour Battery Life, Music Controls, Noise Isolation, Bluetooth 4.1 Extra Gel Tips and Cush Fins",
                           "https://www.amazon.com/Bluetooth-Wireless-Signature-Titanium-Isolation/product-reviews/B072BM7VQX/"),
        "Elgin Ruckus": ("Elgin Ruckus Discord Bluetooth Earplug Earbuds, OSHA Compliant Noise-Isolating Safety Wireless Headphones, All-Weather Earphones for Industrial Work, and High-Noise Environments",
                         "https://www.amazon.com/Elgin-Noise-Isolating-Headphones-All-Weather-Environments/product-reviews/B07N98QPKD/")}

In [2]:
def get_one_product_reviews(name, url, len_page, offset):
  data = access_page(url, len_page, offset, name)
  filename = '_'.join(name.lower().split()) + '_amazon_product_reviews_extended.csv'
  data.to_csv(filename, index=False)
  print(name, data.shape, sep="\n")
  print()
  return data


def get_all_reviews(urls):
  data = pd.DataFrame()
  for url in urls.values():
    product_reviews = get_one_product_reviews(name=url[0], url=url[1], len_page=10, offset=0)
    data = pd.concat([data, pd.DataFrame(product_reviews)], ignore_index=True)
  return data

In [None]:
name, urls = tuple(URLS.items())[0]
get_one_product_reviews(name=name, url=urls[1], len_page=1, offset=0)

In [None]:
all_reviews = get_all_reviews(URLS)
all_reviews.sample()

Beats Flex Wireless Earbuds - Apple W1 Headphone Chip, Magnetic Earphones, Class 1 Bluetooth, 12 Hours of Listening Time, Built-in Microphone - Flame Blue
(500, 6)

Sony WI-C100 Wireless in-ear Bluetooth Headphones with built-in microphone, Black
(500, 6)

Skullcandy Jib+ In-Ear Wireless Earbuds, 6 Hr Battery, Microphone, Works with iPhone Android and Bluetooth Devices - Black
(500, 6)

JLab JBuds Pro Bluetooth Wireless Signature Earbuds, Black, Titanium 10mm Drivers, 10-Hour Battery Life, Music Controls, Noise Isolation, Bluetooth 4.1 Extra Gel Tips and Cush Fins
(500, 6)

Elgin Ruckus Discord Bluetooth Earplug Earbuds, OSHA Compliant Noise-Isolating Safety Wireless Headphones, All-Weather Earphones for Industrial Work, and High-Noise Environments
(500, 6)



Unnamed: 0,Product Name,Name,Stars,Title,Date,Description
679,Sony WI-C100 Wireless in-ear Bluetooth Headpho...,Mariana,,"Bien pero no excelente, no me funciona bien el...",20/11/2023,"Son un poco incómodos para mi gusto, el micróf..."


In [None]:
all_reviews['Description'] = all_reviews['Description'].apply(lambda x: x.replace("\n", " "))
all_reviews.to_csv("all_reviews_amazon_extended.csv", sep ='*')

In [None]:
all_reviews

Unnamed: 0,Product Name,Name,Stars,Title,Date,Description
0,Beats Flex Wireless Earbuds - Apple W1 Headpho...,Amazon Customer,,Sound Great,15/06/2024,These may not be AirPod Pros but they sound ju...
1,Beats Flex Wireless Earbuds - Apple W1 Headpho...,ABR,,Good product,28/06/2024,My first pair of Beats and first pair of earbu...
2,Beats Flex Wireless Earbuds - Apple W1 Headpho...,Jack S,,My Favorite Daily Use Headphones,29/12/2021,"When I'm on the go, at work, or just casually ..."
3,Beats Flex Wireless Earbuds - Apple W1 Headpho...,George wachsmuth,,Great low budget bluetooth earphones,04/05/2024,So….Conclusion is…I really like these earphone...
4,Beats Flex Wireless Earbuds - Apple W1 Headpho...,efowble,,A Super Good Buy For The Price,29/06/2024,Right off the bat…you are gonna get a cool lit...
...,...,...,...,...,...,...
495,Elgin Ruckus Discord Bluetooth Earplug Earbuds...,josh allums,,Almost perfect,09/06/2021,Really pleased with these earbuds. I had the i...
496,Elgin Ruckus Discord Bluetooth Earplug Earbuds...,Jose,,Definitely worth the money,14/05/2021,"I love these so much, I had to buy them again ..."
497,Elgin Ruckus Discord Bluetooth Earplug Earbuds...,Samuel Levine,,"Great Buds, Not THAT tough",23/11/2021,Best buds I’ve found for working (landscaping/...
498,Elgin Ruckus Discord Bluetooth Earplug Earbuds...,Amazon Customer,,Took some getting used to,14/01/2020,I've never used in the ear protection like the...
