In [3]:
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium import webdriver
import plotly.graph_objects as go
import pandas as pd
import datetime
import re
import time
from functools import reduce
from os import getenv

from dotenv import load_dotenv

load_dotenv()


True

In [4]:
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('--disable-popup-blocking')
options.add_argument('--disable-translate')
options.add_argument('--disable-extensions')
options.add_argument('--disable-notifications')
options.add_argument('--disable-infobars')
options.add_argument('--disable-background-timer-throttling')
options.add_argument('--disable-renderer-backgrounding')
options.add_argument('--disable-device-discovery-notifications')
options.add_argument('--disable-breakpad')
options.add_argument('--disable-client-side-phishing-detection')
options.add_argument('--disable-cast-streaming-hw-encoding')
options.add_argument('--disable-cast-streaming-hw-decoding')
options.add_argument('--disable-cast-streaming-dma-buf-video')
options.add_argument('--disable-cast-streaming-vp9-video')
options.add_argument('--disable-cast-streaming-h264-video')
options.add_argument('--disable-cast-streaming-vp8-video')
# options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-accelerated-2d-canvas')
options.add_argument('--disable-accelerated-jpeg-decoding')
options.add_argument('--disable-accelerated-mjpeg-decode')
options.add_argument('--disable-accelerated-video-decode')
options.add_experimental_option("detach", True)

browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()),
                           options=options)


In [5]:
def _wait_driver(browser, timeout):
    return WebDriverWait(browser, timeout)


def wait_element(browser, xpath, timeout=10) -> WebElement:
    try:
        wait = _wait_driver(browser, timeout)
        div = wait.until(EC.presence_of_element_located((By.XPATH,
                                                         xpath)))
        return div
    except TimeoutException:
        print("Timed out waiting for page to load and select element " + xpath)
        return False


def wait_url(browser, url, timeout=10):
    try:
        wait = _wait_driver(browser, timeout)
        wait.until(EC.url_to_be(url))
        return True
    except TimeoutException as e:
        print("Timed out waiting for url " + url)
        return False


def scroll_to_end(browser):
    last_height = browser.execute_script("return document.body.scrollHeight")
    while True:
        browser.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = browser.execute_script(
            "return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height


def revenue(x, cookies):
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),
                              options=options)

    # set cookie
    driver.get("https://shopee.vn/")
    wait_element(driver, "//h1")
    for cookie in cookies:
        driver.add_cookie(cookie)

    # load url until completed
    while True:
        try:
            driver.get(x)
            hihi = wait_element(
                driver,
                "//div[contains(@class, 'product-briefing')]\
                    //div[@class='shopee-drawer']",
                30
            )

            if hihi:
                break
        except NoSuchElementException:
            print("Reloading", x)
            pass

    # get brief info
    brief = wait_element(
        driver,
        "//div[contains(@class, 'product-briefing')]",
    )

    # hover on ? button
    drawer = wait_element(brief, "//div[@class='shopee-drawer']")
    hover = ActionChains(driver).move_to_element(drawer)
    hover.perform()

    try:
        # get drawer box after hover
        box = drawer.find_element(
            By.XPATH, "//div[@class='shopee-drawer__contents']")
        innerText = box.get_attribute("innerText")

        # get sold number
        pattern = r"(\d+)"
        result = re.findall(pattern, innerText)
        sold = int(result[0])
    except NoSuchElementException:
        sold = 0

    # get price
    try:
        # sale off price
        price_element = brief.find_element(
            By.XPATH,
            "//div[2]/div[3]/div/div[3]/div/div/div/div/div[2]/div[1]"
        )
    except NoSuchElementException:
        # original price
        price_element = brief.find_element(
            By.XPATH,
            "//div[2]/div[3]/div/div[3]/div/div/div/div/div[1]/div[1]"
        )

    # parse price to int
    price_text = price_element.get_attribute("innerText")
    price_split = price_text.split("-")
    price = int(price_split[-1]
                .replace(".", "")
                .replace("₫", ""))
    revenue = sold * price
    name = brief.find_element(By.XPATH, "//div[3]/div/div[1]/span").text

    driver.close()
    # print info and return
    print(
        f"Name: {name}\tSold: {sold:,}\tPrice: {price:,}\tRevenue: {revenue:,}"
    )
    return revenue


In [6]:
# Login to get cookies
browser.get("https://shopee.vn/buyer/login")

# wait for input with name loginKey
wait_element(browser, "//input[@name='loginKey']")

# username and password from .env file
username = browser.find_element(By.XPATH, "//input[@name='loginKey']")
username.send_keys(getenv('USER'))
password = browser.find_element(By.XPATH, "//input[@name='password']")
password.send_keys(getenv('PASSWORD'))

# wait 1 sec
time.sleep(1)

# hit login with text "Đăng nhập"
login = browser.find_element(By.XPATH, "//button[text()='Đăng nhập']")

login.click()

# wait element h1
wait_element(browser, "//h1")

# get cookies
cookies = browser.get_cookies()


In [7]:
# init url and shop name
url = "https://shopee.vn/{shop}?page={pageIndex}"
SHOP = "apple_flagship_store"

href_list = []
pageIndex = 0
while True:
    # get url
    browser.get(url.format(shop=SHOP, pageIndex=pageIndex))

    # wait for list of items
    item_list = wait_element(
        browser,
        "//div[@class='shop-page_product-list']/div[@class='shop-all-product-view']",
    )

    # scroll to bottom to get all items
    scroll_to_end(browser)

    # get all items
    new_items = item_list.find_elements(
        By.XPATH,
        "//div[contains(@class, 'shop-search-result-view__item')]"
    )

    # get href of each item and add to list. If no new item, break
    if len(new_items) != 0:
        href_list += [item.find_element(By.XPATH, "./a").get_attribute('href')
                      for item in new_items]
        pageIndex += 1
    else:
        break
    
browser.close()

print("Total items: ", len(href_list))

total_revenue = sum([revenue(x, cookies) for x in href_list])
print("Total revenue at {}: {}".format(
    datetime.datetime.now(),
    total_revenue
))

with open('Exercise_5.csv', 'a') as f:
    f.write(f"{datetime.datetime.now()},{total_revenue}\n")


Total items:  25
Name: Apple 20W USB-C Power Adapter	Sold: 18,433	Price: 470,000	Revenue: 8,663,510,000
Timed out waiting for page to load and select element //h1
Name: [Nhập ELAP557 giảm 7% tối đa 350k] Apple AirPods 3rd gen lightning charge	Sold: 141	Price: 3,990,000	Revenue: 562,590,000
Name: [Nhập ELAP557 giảm 7% tối đa 350k] Apple AirPods with Charging Case 2nd gen	Sold: 28,071	Price: 2,790,000	Revenue: 78,318,090,000
Name: [Nhập ELMGFS5 giảm 5% tối đa 1.2 triệu] Apple iPhone 14 Pro 128GB	Sold: 967	Price: 25,390,000	Revenue: 24,552,130,000
Name: [Nhập ELMGFS5 giảm 5% tối đa 1.2 triệu] Apple iPhone 14 Pro Max 128GB	Sold: 2,026	Price: 27,490,000	Revenue: 55,694,740,000
Name: [Nhập ELAP557 giảm 7% tối đa 350k] Apple AirPods 3rd gen magsafe charge	Sold: 2,795	Price: 4,490,000	Revenue: 12,549,550,000
Timed out waiting for page to load and select element //h1
Name: [Nhập ELAP557 giảm 7% tối đa 350k] Apple AirPods Pro 2nd gen (2022)	Sold: 1,042	Price: 5,690,000	Revenue: 5,928,980,000
Nam