In [9]:
import sys
sys.path.append("../")
from Amazon.patch_class import *
import requests as rqs
from lxml import etree
from selenium import webdriver

In [4]:
class Sephora_Detail_Patch(PatchResult):
    def __init__(self, header: dict) -> None:
        super().__init__("https://www.sephora.com", header)

    def get_result(self, URL_pattern, **kwargs) -> Dict[str, str]:
        r = rqs.get(URL_pattern, headers=self.header)
        if (r.status_code!=200):
            print(f"Error: {r.status_code}")
            return {}
        self.node = etree.HTML(r.text)
        title = self._get_joined_text("//*[@id='productTitle']/text()", "")
        price = self._get_joined_text("//*[@id='corePrice_desktop']//*[@id='sns-base-price']/text()", "")
        style = self._get_joined_text("//*[@id='variation_style_name']/div/span/text()", "")
        details = self._get_details(remove_key=["UPC", "ASIN", "Batteries"])
        ratings = self._get_ave_ratings()
        comments = self._get_comments(**kwargs)
        attr_key = self._get_text_list("//table[@class='a-normal a-spacing-micro']//tr/td[1]/span/text()", "")
        attr_val = self._get_text_list("//table[@class='a-normal a-spacing-micro']//tr/td[2]/span/text()", "")
        attrs = dict(zip(attr_key, attr_val))
        result =  {
            "title": title,
            "price": price,
            "style": style,
            "ratings": ratings,
            "comments": comments,
        }
        result.update(attrs)
        result.update(details)
        # "attrs": attrs, "details": details,
        self.node = None
        return result

    def get_page_urls(self, page_pattern:str, max_page:int=30) -> List[str]:
        all_urls = []
        for page in range(max_page):
            print(f"\r---> Processing Search Page: {page+1}...", end='')
            r = rqs.get(page_pattern.format(page), headers=self.header)
            if (r.status_code != 200):
                print(f"Http Error: {r.status_code}")
                return all_urls
            self.node = etree.HTML(r.text)
            hrefs = self._get_text_list("//*[@data-component-type='s-search-result']//h2/a/@href", None)
            for url in hrefs:
                if (url != None):
                    all_urls.append(self.root_url + url)
        print(" done.")
        return all_urls


In [5]:
header = {
    "authority": "www.sephora.com",
    "scheme":"https",
    "accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.90",
    "accept-language": "en,zh-CN;q=0.9,zh;q=0.8,en-CN;q=0.7",
    "cookie": 'session-id=137-3328056-1763758; ubid-main=132-7960620-4393815; aws-ubid-main=744-3338385-3071068; session-id-eu=258-8382607-9461569; ubid-acbuk=260-8504814-0792203; x-main="MT30@1?PvLY0mSohFkFYBXnBXVcn9KLJ?p9K2KsKaDmRtRU61BuQnmm@jdeEAsSv"; at-main=Atza|IwEBIG4HmLRFnwFKYVcouxkJ-dvwOK6pKb9glsfFfXPyEffibKPpvhFdzlbdoCxeJBhQjCfqeoM8A_ZjvtFLNolhBS4XhuvSyDL2FDVucyKXdoKMtpKiJ6tSz2ZZG6iM55IQsVy4T8saTizg-AcmOPWSk_9SeZuZIE8l0-gupScTQEZHRRC_wG4Y_Y6EJbMLCZxBAydZ_1fs8CSGw0E5IHTFrnw-; sess-at-main="i33quMZwZ7wXMSkTPqgzpwZOL2h2/nb71aKj9MwsGqA="; sst-main=Sst1|PQGY5cbqIJUKRZkRR82Q1sKQCTnjzglU2LJqYztkzAM6Tw4H-pNkc6JUM-ruRRM9uAbz6aES8xAk8mvXT1Aig-wpXxykpntYDae6fB-Cv_6DUtfmdDee89vFTLSAthqEA_qJxnnXQFCuXw0AXTY8lcjPaRy2NaSxAodDsaCwPwOm2iKjaM3c6jOep7EXi41a4mwtZgWMUeIpVfnVsgvhdOySmeQJZXoglrmBlDIpnXF3zmI7-Kk76FlKH7s9-BBTja1T7w7X4bwHlvB4oOADYWiCbhLd6H5LMxGJW08p2gQKNPI; aws-userInfo-signed=eyJ0eXAiOiJKV1MiLCJrZXlSZWdpb24iOiJ1cy1lYXN0LTEiLCJhbGciOiJFUzM4NCIsImtpZCI6IjNhYWFiODU3LTRlZjItNGRjNi1iOTEwLTI4Y2IwYmZiNDM3ZSJ9.eyJzdWIiOiIiLCJzaWduaW5UeXBlIjoiUFVCTElDIiwiaXNzIjoiaHR0cDpcL1wvc2lnbmluLmF3cy5hbWF6b24uY29tXC9zaWduaW4iLCJrZXliYXNlIjoiN0xTNVloUGxIbWZ5OGV1QTA5U2hWcnpVN1wvWFJZVG1PcXhsNGhXQzE3MkU9IiwiYXJuIjoiYXJuOmF3czppYW06Ojk1OTcyNjAzMjI5OTpyb290IiwidXNlcm5hbWUiOiJjenEwMSJ9.D90z7xEnFo7TbU6r58EODRgMSoIj-IU0NxPW54xh8upmioSWwZunJx5qwfqlsB8Ppy7rWScYdhIN69pmzL1W5lHdbJW5_274fZSyCm5kbMi4vf4cCcMgX3W3GdJ4Hmmj; aws-userInfo={"arn":"arn:aws:iam::959726032299:root","alias":"","username":"czq01","keybase":"7LS5YhPlHmfy8euA09ShVrzU7/XRYTmOqxl4hWC172E\u003d","issuer":"http://signin.aws.amazon.com/signin","signinType":"PUBLIC"}; _RCRTX03-samesite=f567ca77ffeb11ec9ae6cb6f21e2e6d68ee2d79592214149ac1f0dac6da708ea; i18n-prefs=USD; lc-main=en_US; skin=noskin; csd-key=eyJ3YXNtVGVzdGVkIjp0cnVlLCJ3YXNtQ29tcGF0aWJsZSI6dHJ1ZSwid2ViQ3J5cHRvVGVzdGVkIjpmYWxzZSwidiI6MSwia2lkIjoiODIxMzRjIiwia2V5IjoiaWtzT3pmaHdYZGRCd0lJWDB0bXlJMkpvY1VRcXhPV0RwUmZrSEtsRG83NU5pcWNGcGpqNWUvRTl4ekxOZWdRUzN6dXU5NUkvUi9pSVlGTlJWNDVWR3pmN0lZdkFPdDMwZTBVSDhRYkp6Yk5Qb0Z1RFF5TFAwSnkvdzdabUhEaTBGa3N3cmFDYUhHL3ZkbWh1ZzNONEExSWRXa3QxWXFJTXdkVDJzYnVscVBoR1EraVBOYVdwWkhpZ0xidjl2V3hVaXVhV0FqMWFKRzdhMk9BeTBNdzd3YWt1WERWZkNDMlNXQmVGZzFlcGlSNGh6a3Nka3ovdVJ3RUQ0NzRFVWN5U2ZxVmhVNlpLeFBQTE04NWRUdy9mN096SWlOWGszUEtGVkNYbGlLK2VHVmJGU2hHUFV6NFdreDVCb0lCSlNITWx2U0ZzbkpxS2dJTjN0SkdDSnlFZDRRPT0ifQ==; _rails-root_session=ZUhRdHl1Vi9sUmJjU0xoUTg1VndKdS9xaHM3NlVoTjNVbHdnRU9LUU5wL1FObVRVd0V3Q0x3V1p6YU9NRjN4cDQzd2dPQ3VCRFZUdTVoelJvSHF0WFZuTXdIU2lvUmtJT2gydTVlUVBGeVBqN3E4QzBHS3pOYUNkUzNaNGMwYStEMURHaHhId2p1dXhwbTc3ZUZTQzhBYzZyelo5WFdaVGZweHJtK2k1Y0h4TUJBT1BISklxWUhFeGVhY0VqZjRoLS0rNHFmamNjSUk5aTNveDh6bVE1MmZnPT0=--1722e538c0108096427bdde65389fce1d9311e33; s_fid=138AFD1A879D8EB6-04CBB4AA6978C4A4; s_cc=true; csm-hit=tb:9YZE5Z96FMH7BHK5M81N+s-9YZE5Z96FMH7BHK5M81N|1665027781318&t:1665027781318&adb:adblk_yes; session-id-time=2082787201l; session-token=tRdG2mAHAs14hOY1f03rTaQqcYJI4ZQ592QeVdIWUpqXtPamw+H0EnA9XNc7lsjeMfuNpFtTXBJ28kQ7K+nxOdNqL+37UznaSDazcgvlVmos4KLRExcgUKXsl3XeOo/zt3dl7pyTZfDrrSscApi5lMNEmDKzx8YVWAJ/HfDcd+jNtBs4Px1pm3tgt60XLBkatAxjgDzoEUzClPEj3FogufAfvDVTFG8kF4W/3pn6off3xelnw92h7PNcXrkQCRq8AqRtxyLmI4Oet2GWOoAVTw==',
    "device-memory": "8",
    "sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
    "sec-ch-platform": '"Windows"',
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
}

In [7]:
url = "https://www.sephora.com/shop/perfume?currentPage={}"
r = rqs.get(url.format(1), headers=header)
print(r.status_code)

200


In [10]:
driver = webdriver.Chrome("./chromedriver.exe")
driver.get("https://www.sephora.com/shop/perfume")

  driver = webdriver.Chrome("./chromedriver.exe")


In [12]:
elements = driver.find_elements("xpath", "//div[@data-comp='ProductGrid ']//a")
elements

[<selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="217c75b6-05df-4fe5-8134-76e5cf9c787b")>,
 <selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="06ad07c2-e94d-4a2f-8609-fd5410878d0a")>,
 <selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="ec33116c-688a-4977-8c09-be71f76fdd2e")>,
 <selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="38dfa247-0537-463f-8bb4-e9a1ee28564a")>,
 <selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="f4d7839a-ddf7-46fe-8b69-282fa8d1bd10")>,
 <selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="3fae1b6e-c4fe-409b-a934-0948b7e0a91e")>,
 <selenium.webdriver.remote.webelement.WebElement (session="f4fbcb80fe5d57fc15ff6fe8f9ad5a6f", element="afd5aea5-622e-4a3f-8bad-1b

In [14]:
driver.execute_script("arguments[0].scrollIntoView();",elements[1])