In [4]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv
import re

## Webscraping custom fragrances: Data Exploration and Personal Interest

In [5]:
def scrape_perfume_data(perfume_url):
    # setup driver
    service = Service()
    options = Options()
    driver = webdriver.Chrome(service=service, options=options)
    driver.command_executor.set_timeout(1000)

    try:
        driver.get(perfume_url)
        time.sleep(3)

        # perfume name
        name_element = driver.find_element(By.CSS_SELECTOR, 'h1[itemprop="name"]')
        name = name_element.text.strip().lower()
        name = name.replace(" le labo", "").replace(" for women and men", "").replace(" ", "-")

        # gender
        gender = driver.find_element(By.CSS_SELECTOR, 'h1[itemprop="name"] small').text.strip().lower()

        # ratings
        rating_value = driver.find_element(By.CSS_SELECTOR, 'span[itemprop="ratingValue"]').text.strip()
        rating_count = driver.find_element(By.CSS_SELECTOR, 'span[itemprop="ratingCount"]').text.strip()

        # notes loop
        xpath = '//*[@id="pyramid"]/div[1]/div/div[2]/div[3]/div/div[{}]/div[2]'
        idx, max_tries, notes = 1, 14, []
        while idx <= max_tries:
            try:
                note_elements = WebDriverWait(driver, 10).until(
                    EC.presence_of_all_elements_located((By.XPATH, xpath.format(idx))))
                notes += [el.text.strip().lower() for el in note_elements]
                idx += 1
            except:
                break

        # launch year
        description_text = driver.find_element(By.CSS_SELECTOR, 'div[itemprop="description"] p').text
        match = re.search(r'was launched in (\d{4})', description_text)
        launch_year = match.group(1) if match else "Unknown"

        # accords
        accord_elements = driver.find_elements(By.CSS_SELECTOR, 'div.accord-bar')
        accords = [a.text.strip().lower() for a in accord_elements]
        # fill with unknown if less than 4
        mainaccords = accords + ["Unknown"] * (4 - len(accords))  

        # print
        print(f"Name: {name}")
        print(f"Gender: {gender}")
        print(f"Rating Value: {rating_value}")
        print(f"Rating Count: {rating_count}")
        print(f"Launch Year: {launch_year}")
        print(f"Notes: {notes}")
        print(f"Main Accords: {mainaccords[:4]}")

        # add to csv
        with open('le_labo_perfumes.csv', mode='a', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow([
                name, gender, rating_value, rating_count, launch_year,
                notes, mainaccords[0], mainaccords[1], mainaccords[2], mainaccords[3]
            ])

    except Exception as e:
        print(f"Error extracting data: {e}")

    driver.quit()

In [6]:
# scrape_perfume_data("https://www.fragrantica.com/perfume/Le-Labo/Vetiver-46-6328.html")