In [54]:
# Installing all necessary packages

!pip install selenium
!pip install webdriver-manager
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set up the Selenium WebDriver
options = Options()
options.headless = False  # Set to True if you want the browser to run in the background
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Open the page with the perfume data from Ulta
url = 'https://www.ulta.com/shop/fragrance/womens-fragrance/perfume'  # Adjust the URL as needed
driver.get(url)

# Wait for the page to load completely
WebDriverWait(driver, 30).until(
    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.ProductListingResults__productCard"))
)
print("Page loaded successfully.")

# Create a set to track scraped product names
scraped_products = set()

# Create a list to store the product data
product_data = []

# Function to scrape products from the current page
def scrape_products():
    try:
        product_containers = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.ProductListingResults__productCard"))
        )

        print(f"Number of products scraped: {len(product_containers)}")

        for container in product_containers:
            try:
                product_name_element = container.find_element(By.CSS_SELECTOR, ".ProductCard__product")
                product_name = product_name_element.text

                if product_name in scraped_products:
                    print(f"Skipping duplicate product: {product_name}")
                    continue  # Skip this product and move to the next one
                 
                scraped_products.add(product_name)
                
                brand_name_element = container.find_element(By.CSS_SELECTOR, ".ProductCard__brand")
                brand_name = brand_name_element.text
                
                reviews_element = container.find_element(By.CSS_SELECTOR, ".sr-only")
                reviews = reviews_element.text

                # Append the extracted data to the list
                product_data.append([product_name, brand_name, reviews])

                print(f"Scraped: {product_name}")
            except Exception as e:
                print("Error scraping product:", e)

    except Exception as e:
        print("Error with product containers:", e)

# Main loop to manually click 'Load More' and scrape after each click
while True:

    input("Click the 'Load More' button on the webpage and press Enter when you're ready to scrape the next batch of products.")
    scrape_products()
    continue_scraping = input("Do you want to continue scraping? (y/n): ")
    if continue_scraping.lower() != 'y':
        break

# Close the driver after scraping is complete
driver.quit()

# Convert the product data list into a pandas DataFrame
df = pd.DataFrame(product_data, columns=["Product Name", "Brand", "Reviews"])

# Display the DataFrame to check the data
print(df)

# Optionally, save the DataFrame to a CSV file
df.to_csv('ulta_perfume_data.csv', index=False)


Page loaded successfully.


Click the 'Load More' button on the webpage and press Enter when you're ready to scrape the next batch of products. 


Number of products scraped: 534
Scraped: Eilish Eau de Parfum
Scraped: COCO MADEMOISELLE Eau de Parfum Spray
Scraped: Donna Born In Roma Eau de Parfum
Scraped: Cloud Eau de Parfum
Scraped: Miss Dior Eau de Parfum
Scraped: Born in Roma Donna Perfume Gift Set
Scraped: Her Eau de Parfum
Scraped: CHANCE EAU TENDRE Eau de Parfum Spray
Scraped: Burberry Goddess Eau de Parfum
Scraped: Good Girl Blush Eau de Parfum
Scraped: MOD Vanilla Eau de Parfum
Scraped: Eilish No. 2 Eau de Parfum
Scraped: CHANCE EAU TENDRE Eau de Toilette Spray
Scraped: Libre Eau de Parfum
Scraped: Cosmic Kylie Jenner Eau de Parfum
Scraped: Sublime by Tory Burch Eau de Parfum
Scraped: Her Elixir de Parfum
Scraped: Paradoxe Eau de Parfum
Scraped: Black Opium Eau de Parfum
Scraped: Good Girl Eau de Parfum
Scraped: Light Blue Eau de Toilette
Scraped: J'adore Parfum D'eau Eau de Parfum
Scraped: Vault Collection
Scraped: CHANCE EAU FRAÎCHE Eau de Parfum Spray
Scraped: La Vie Est Belle Eau de Parfum
Scraped: LOVENOTES Pink Wood

Do you want to continue scraping? (y/n):  n


                              Product Name           Brand  \
0                     Eilish Eau de Parfum   Billie Eilish   
1    COCO MADEMOISELLE Eau de Parfum Spray          CHANEL   
2         Donna Born In Roma Eau de Parfum       Valentino   
3                      Cloud Eau de Parfum   Ariana Grande   
4                  Miss Dior Eau de Parfum            Dior   
..                                     ...             ...   
495              Montana Sky Spray Perfume        Pacifica   
496             FLORIST Eau de Parfum Mini  Ellis Brooklyn   
497        VANILLA MILK Eau de Parfum Mini  Ellis Brooklyn   
498     Toy Boy Eau de Parfum Travel Spray        Moschino   
499                     SALT Eau de Parfum  Ellis Brooklyn   

                               Reviews  
0    4.5 out of 5 stars ; 3431 reviews  
1     4.5 out of 5 stars ; 362 reviews  
2    4.8 out of 5 stars ; 6034 reviews  
3    4.3 out of 5 stars ; 2772 reviews  
4    4.6 out of 5 stars ; 9704 reviews  
..       