In [None]:
import csv
import os
import pandas as pd
import re
import time
from datetime import datetime
from pathlib import *
import random
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import pytz
from pyvirtualdisplay import Display
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

# Start a virtual display
display = Display(visible=0, size=(1920, 1080))
display.start()


# Define the Vietnam timezone
local_tz = pytz.timezone('Asia/Ho_Chi_Minh')
# Get the current time in UTC
now_utc = datetime.now(pytz.utc)
# Convert UTC time to local time
now = now_utc.astimezone(local_tz).date().strftime('%Y-%m-%d')


# Chrome
def chrome_drive():

    # Configure Chrome options
    options = webdriver.ChromeOptions()
    # options.add_argument('--headless')  # Run in headless mode
    options.add_argument('--disable-gpu')  # Disable GPU hardware acceleration
    options.add_argument('--no-sandbox')  # Bypass OS security model
    options.add_argument('--disable-dev-shm-usage')  # Overcome limited resource problems

    # Create a driver instance
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)


    # Disable the "Chrome is being controlled by automated test software" notification
    options.add_experimental_option("excludeSwitches", ["enable-automation"])

    # # Disable the "navigator.webdriver" property
    options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})

    # Disable the "Chrome is being controlled by automated test software" banner
    options.add_argument("--disable-blink-features=AutomationControlled")

    # Maximize the window to avoid fingerprinting based on screen resolution
    options.add_argument("start-maximized")

    # Disabling the Automation Extension can help prevent detection as an automated script and increase the chances of
    # successfully completing your automation tasks.
    options.add_experimental_option('useAutomationExtension', False)

    # This argument tells the browser to ignore any SSL certificate errors that may occur while accessing a website,
    # which is useful when testing on a site with a self-signed or invalid SSL certificate. Without this argument,
    # the browser will display a warning message about the certificate and require manual confirmation to proceed.
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--ignore-ssl-errors=yes')

    # wait for the page to be fully loaded before proceeding
    # options.page_load_strategy = 'normal'  # 'none', 'eager', or 'normal'

    prefs={
        "disable-transitions": True,
        "profile.managed_default_content_settings.images": 2,
        "profile.default_content_setting_values.notifications": 2
    }
    options.add_experimental_option("prefs", prefs)

    return driver


class DDV:
    def ddv(self, link_check: list, restart_link: str):

        driver = chrome_drive()

        wait = WebDriverWait(driver, 10)

        action = ActionChains(driver)

        # Define the base path to Google Drive folder
        base_path = '/content'
        output_dir = os.path.join(base_path, f"{now}")
        output_img = os.path.join(output_dir, 'img_ddv')

        def record():
            if not os.path.exists(f"../output/{now}"):
                os.makedirs(f"../output/{now}")

            with open(f"../output/{now}/4-ddv-{now}.csv", "a") as file:
                writer = csv.DictWriter(file,
                                        fieldnames=["Product_Name", "Ton_Kho", "Gia_Niem_Yet", "Gia_Khuyen_Mai", "Date",
                                                    "Khuyen_Mai", "Link", "screenshot_name"], delimiter=";")
                if os.stat(f"../output/{now}/4-ddv-{now}.csv").st_size == 0:
                    writer.writeheader()
                for row in data_list:
                    writer.writerow(row)

        def screen_shot(product_name):
          if not os.path.exists(output_img):
              os.makedirs(output_img)
          product_name_new=re.sub(r'[^\w\-\.]', '_', product_name).strip('. ')
          driver.fullscreen_window()
          driver.set_window_size(1920, 2080)
          # Scroll up by 100 pixels
          driver.execute_script("window.scrollBy(0, -100);")
          driver.get_screenshot_as_png()
          driver.save_screenshot(f"{output_img}/{product_name_new}_{now_utc.astimezone(local_tz)}.png")
          return product_name_new, screen_shot_folder
        
        def check_price():
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            try:
                driver.find_element(By.XPATH,
                                    "//div[@class='relative w-full flex-col justify-start']/h1").text.strip()
            except NoSuchElementException:
                product_name = f"Please double check the link: {link}"
            else:
                # product_name = soup.find_all("div", class_="heading-title")[0].get_text().strip()
                product_name = driver.find_element\
                    (By.XPATH, "//div[@class='relative w-full flex-col justify-start']/h1").text.strip()
            to_remove_in_name = [" Chính hãng (VN/A)", "Chính Hãng (VN/A)", " Chính hãng", " | Chính Hãng VN/A",
                                 " Chính Hãng"]
            for item in to_remove_in_name:
                if item in product_name:
                    product_name = product_name.replace(item, "")
            print(product_name)

            def check_stock():
                # Check inventory status
                try:
                    ton_kho_note = driver.find_element(By.XPATH, "//p[@class='text-center text-20 font-bold text-white']").\
                        text.lower()
                    if "ngay" in ton_kho_note:
                        ton_kho = "Yes"
                        return ton_kho
                    elif ("thông tin" in ton_kho_note) or ("ký" in ton_kho_note) or ("đặt trước" in ton_kho_note):
                        ton_kho = "No"
                        return ton_kho
                except NoSuchElementException:
                    try:
                        ton_kho_note = driver.find_element(By.XPATH, "//p[@class='px-2 text-16 font-bold text-ddv']")\
                            .text.lower()
                        if "hết hàng" in ton_kho_note:
                            ton_kho = "No"
                            return ton_kho
                    except NoSuchElementException:
                        ton_kho = f"Please double check the link: {link}"
                        return ton_kho

            ton_kho = check_stock()

            if "Please double check the link" in ton_kho:
                # try:
                more_options = driver.find_elements(By.XPATH,
                                                    "(//p[@class='text-sm text-brow']/following-sibling::div/div)")
                for num in range(len(more_options) - 1):
                    more_option = driver.find_element(By.XPATH,
                                                        f"(//p[@class='text-sm text-brow']/following-sibling::div/div)[{num + 2}]")
                    action.move_to_element(more_option).click().perform()
                    time.sleep(1)

                    ton_kho = check_stock()
                    if "Please" not in ton_kho:
                        break

            # Check promotional price
            try:
                # soup.find("div", class_="price-box").find("span", class_="price").text
                driver.find_element(By.XPATH, "//p[@class='text-36 font-bold text-ddv']")
            except NoSuchElementException:
                gia_khuyen_mai = "N/A"
            else:
                gia_khuyen_mai = driver.find_element(By.XPATH, "//p[@class='text-36 font-bold text-ddv']").\
                    text.replace("₫", "").replace(".", "").replace(",", "").replace("đ", "")

            # Check listed retail price
            try:
                # soup.find("div", class_="product-price-top").find("span", class_="vmarket-price").text
                driver.find_element(By.XPATH, "//p[@class='text-36 font-bold text-ddv']/following-sibling::span")
            except NoSuchElementException:
                gia_niem_yet = gia_khuyen_mai
            else:
                # gia_niem_yet = soup.find("div", class_="product-price-top").find("span", class_="vmarket-price").text
                gia_niem_yet = driver.find_element\
                    (By.XPATH, "//p[@class='text-36 font-bold text-ddv']/following-sibling::span").text

            # check additional promotion
            try:
                # soup.find_all("div", class_="promotion-info")[0].get_text().strip().replace('\n\n\n', '\n')
                driver.find_element\
                    (By.XPATH, "//div[@class='flex w-full flex-col items-start justify-start bg-white p-2']")
            except NoSuchElementException:
                khuyen_mai = ""
            else:
                # khuyen_mai = soup.find_all("div", class_="promotion-info")[0].get_text().strip().replace('\n\n\n', '\n')
                khuyen_mai = driver.find_element\
                    (By.XPATH, "//div[@class='flex w-full flex-col items-start justify-start bg-white p-2']").text.\
                    strip().replace('\n\n\n', '\n')

            screenshot_name = screen_shot(product_name)
            
            new_data = {"Product_Name": product_name, "Ton_Kho": ton_kho, "Gia_Niem_Yet": gia_niem_yet,
                        "Gia_Khuyen_Mai": gia_khuyen_mai, "Date": now, "Khuyen_Mai": khuyen_mai, 'Link': link, 'screenshot_name': screenshot_name}
            data_list.append(new_data)


        self.link_check = link_check
        self.restart_link = restart_link

        for link in self.link_check[self.link_check.index(self.restart_link):]:
            print(link)
            try:
                data_list = []
                driver.get(link)
                time.sleep(8)
                check_price()
                record()
            except TimeoutException:
                driver.quit()
                print(datetime.now())
                print(f"Start again from link: {link}")
                self.restart_link = link
                self.ddv(self.link_check, self.restart_link)

        driver.quit()


