In [1]:
import csv
import os
import pandas as pd
import re
import time
from datetime import datetime
from pathlib import *
import random
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import pytz
from pyvirtualdisplay import Display
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

# Start a virtual display
display = Display(visible=0, size=(1920, 1080))
display.start()



# Define the Vietnam timezone
local_tz = pytz.timezone('Asia/Ho_Chi_Minh')
# Get the current time in UTC
now_utc = datetime.now(pytz.utc)
# Convert UTC time to local time
now = now_utc.astimezone(local_tz).date().strftime('%Y-%m-%d')

# Chrome
def chrome_drive():

    # Configure Chrome options
    options = webdriver.ChromeOptions()
    # options.add_argument('--headless')  # Run in headless mode
    options.add_argument('--disable-gpu')  # Disable GPU hardware acceleration
    options.add_argument('--no-sandbox')  # Bypass OS security model
    options.add_argument('--disable-dev-shm-usage')  # Overcome limited resource problems

    # Create a driver instance
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)


    # Disable the "Chrome is being controlled by automated test software" notification
    options.add_experimental_option("excludeSwitches", ["enable-automation"])

    # # Disable the "navigator.webdriver" property
    options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})

    # Disable the "Chrome is being controlled by automated test software" banner
    options.add_argument("--disable-blink-features=AutomationControlled")

    # Maximize the window to avoid fingerprinting based on screen resolution
    options.add_argument("start-maximized")

    # Disabling the Automation Extension can help prevent detection as an automated script and increase the chances of
    # successfully completing your automation tasks.
    options.add_experimental_option('useAutomationExtension', False)

    # This argument tells the browser to ignore any SSL certificate errors that may occur while accessing a website,
    # which is useful when testing on a site with a self-signed or invalid SSL certificate. Without this argument,
    # the browser will display a warning message about the certificate and require manual confirmation to proceed.
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--ignore-ssl-errors=yes')

    # wait for the page to be fully loaded before proceeding
    # options.page_load_strategy = 'normal'  # 'none', 'eager', or 'normal'

    prefs={
        "disable-transitions": True,
        "profile.managed_default_content_settings.images": 2,
        "profile.default_content_setting_values.notifications": 2
    }
    options.add_experimental_option("prefs", prefs)

    return driver



class MW:
    def mw(self, link_check: list, restart_link: str, vnpay_flag = False):
        # data_list=[]

        driver=chrome_drive()

        wait=WebDriverWait(driver, 20)

        actions=ActionChains(driver)

        # Define the base path to Google Drive folder
        base_path = '/content'
        output_dir = os.path.join(base_path, f"{now}")
        output_img = os.path.join(output_dir, 'img_mw')

        def record(data_list):
            output_dir = os.path.join(base_path, f"{now}")

            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            file_path = os.path.join(output_dir, f"2-mw-{now}.csv")

            with open(file_path, "a") as file:
                writer=csv.DictWriter(file,
                                      fieldnames = ["Product_Name", "Ton_Kho", "Gia_Niem_Yet", "Gia_Khuyen_Mai",
                                                    "Chien_Gia", '+VNPAY', "Store_Chien", "Date",
                                                    "Khuyen_Mai", "Uu_Dai_Them", "Link", "screenshot_name"], delimiter = ";")
                if os.stat(file_path).st_size == 0:
                    writer.writeheader()
                for row in data_list:
                    writer.writerow(row)

        def screen_shot(product_name):
          if not os.path.exists(output_img):
              os.makedirs(output_img)
          product_name_new=re.sub(r'[^\w\-\.]', '_', product_name).strip('. ')
          driver.fullscreen_window()
          driver.set_window_size(1920, 2080)
          # Scroll up by 100 pixels
          driver.execute_script("window.scrollBy(0, -100);")
          driver.get_screenshot_as_png()
          driver.save_screenshot(f"{output_img}/{product_name_new}_{now_utc.astimezone(local_tz)}.png")
          return product_name_new, screen_shot_folder

        
        def vnpay_mw(km, khuyen_mai):
            # Get the promotion details for the given item from the mw_df DataFrame
            promotion_details=khuyen_mai.lower()
            try:
                km=km.replace(".", "").replace(",", "").replace("đ", "").replace("₫", "")
            except ValueError:
                km=0
            except AttributeError:
                pass

            if ("qrcode" in promotion_details) or ("vnpay" in promotion_details):
                for promotion in promotion_details.split("\n"):
                    if ("qrcode" in promotion) or ("vnpay" in promotion):
                        if "%" in promotion:
                            # Calculate the percentage discount
                            try:
                                percent_discount=int(promotion[(promotion.index("%") - 2):promotion.index("%")])
                            except ValueError:
                                percent_discount=100

                            # Calculate the maximum discount amount
                            try:
                                max_raw_discount=promotion[(promotion.index("thêm") + 5):]
                            except ValueError:
                                try:
                                    max_raw_discount=promotion[(promotion.index("đa") + 3):]
                                except ValueError:
                                    max_raw_discount=""

                            try:
                                max_discount=int(
                                    (max_raw_discount[:max_raw_discount.index("đ")]).replace(".", "").
                                    replace("₫", "").replace("đ", "").replace("k", "000"))
                            except ValueError:
                                max_discount=0

                        else:
                            try:
                                # Calculate the fixed discount amount
                                max_raw_discount=promotion[(promotion.index("đến") + 3):]
                            except ValueError:
                                max_raw_discount=promotion[(promotion.index("đa") + 3):]
                            if "đơn hàng từ" not in promotion:
                                max_discount=int(
                                    max_raw_discount[:max_raw_discount.index("đ")].replace(".", "").replace(",",
                                                                                                            ""))
                            elif "đơn hàng từ" in promotion:
                                condition=int(promotion[(promotion.index("đơn hàng từ") + 12):(
                                    promotion.index("triệu"))]) * 1000000
                                if int(km.replace("soc", "")) >= condition:
                                    max_discount=int(
                                        max_raw_discount[:max_raw_discount.index("k")].replace(".", "").replace(",",
                                                                                                                "")) * 1000
                                else:
                                    max_discount=0

                            percent_discount=100

                # Calculate the total discount amount
                total_discount_1=int(km.replace("soc", "")) * ((100 - percent_discount) / 100)
                total_discount_2=int(km.replace("soc", "")) - max_discount
                total_discount=max(total_discount_1, total_discount_2)
            else:
                total_discount=km
            return total_discount


        def vnpay_mw_table(km, oc_vnpay_dict: dict, now: str):
            pattern=r'[₫,\.đ]|soc'  # Define a regex pattern to match '₫', ',', and 'soc'
            # Use regex to replace the specified pattern if km is a string
            if isinstance(km, str):
                try:
                    km=int(re.sub(pattern, '', km))
                except ValueError:
                    return km

            # many time, the final price board has .0 at the end, so I need to take this step
            if isinstance(km, float):
                km=int(km)

            # Create DataFrame of VNPAY discount from dictionary
            oc_vnpay_df=pd.DataFrame.from_dict(oc_vnpay_dict)

            # Check if the current date is within the valid range
            current_date=pd.to_datetime(now).date()
            valid_date=pd.to_datetime(oc_vnpay_df.iloc[0]['Date_MW']).date()

            if current_date <= valid_date:
                try:
                    for _, row in oc_vnpay_df.iterrows():
                        low_range=int(row['Range_Low_MW'])
                        high_range=int(row['Range_High_MW'])

                        # Check if km is within the range
                        if low_range <= km < high_range:
                            # Calculate the amount of VNPAY discount
                            discount_amount=int(row['VNPay_Amount_MW'])
                            return km - discount_amount
                except ValueError:
                    # Handle the case where km is just a text
                    return 0
                return km

        

        def check_shock_price():
            # Check for special case where price is so low that no other promotion is applied
            try:
                # gia_soc = driver.find_element(By.CSS_SELECTOR, ".bs_title strong").text.replace(" *", "")
                # From Feb 18th, 2023
                driver.find_element(By.CSS_SELECTOR, ".bs_price strong").text.replace(" *", "")
                gia_soc=driver.find_element(By.CSS_SELECTOR, ".bs_price strong").text.replace(" *", "")
                gia_khuyen_mai=gia_soc + "soc"
                gia_niem_yet=driver.find_element(By.CSS_SELECTOR, ".bs_price em").text.replace(" *", "")
                return gia_khuyen_mai, gia_niem_yet
            except NoSuchElementException:
                try: # started on Nov 9th, 2023
                    gia_soc=driver.find_element(By.XPATH, ".//div[@class='bc_title']/div/strong").text.replace(" *", "")
                    gia_khuyen_mai=gia_soc + "soc"
                    gia_niem_yet=driver.find_element(By.XPATH, ".//div[@class='bc_title']/div/em").text.replace(" *", "")
                    return gia_khuyen_mai, gia_niem_yet
                except NoSuchElementException:
                    try:
                        # This class started on August 19th, 2022
                        gia_soc=driver.find_element(By.CSS_SELECTOR, ".oo-left strong").text.replace(" *", "")
                    except NoSuchElementException:
                        return None
                    else:
                        gia_khuyen_mai=gia_soc + "soc"
                        gia_niem_yet=driver.find_element(By.CSS_SELECTOR, ".oo-left em").text.replace(" *", "")
                        return gia_khuyen_mai, gia_niem_yet

        def check_cash_discount(gia_khuyen_mai_raw):
            # Check if the cash discount is available on this product
            try:
                # This is when a direct discount is applied
                driver.find_element(By.CLASS_NAME, "label-radio")
            except NoSuchElementException:

                # This snippet is for the preorder product - start
                try:
                    promo_element=driver.find_element(By.CLASS_NAME, "promo")
                except NoSuchElementException:
                    return gia_khuyen_mai_raw
                else:
                    option_km_thems=promo_element.text
                    for option in option_km_thems.split("\n"):
                        print(option)
                        if ("triệu" in option.lower()):
                            km_them_raw=option[(option.index("ngay") + 5): (option.index(" triệu"))].strip()
                            if ('.' not in km_them_raw) and (',' not in km_them_raw):
                                km_them=km_them_raw + "000000"
                                gia_khuyen_mai_new=int(
                                    gia_khuyen_mai_raw.replace("đ", "").replace(".", "").replace("₫", "")) - int(
                                    km_them)
                                print(f"km_them: {km_them}")
                                print(f"gia_khuyen_mai: {gia_khuyen_mai_new}")
                                return gia_khuyen_mai_new
                            else:
                                km_them=km_them_raw.replace('.', '').replace(',', '') + "00000"
                                gia_khuyen_mai_new=int(
                                    gia_khuyen_mai_raw.replace("đ", "").replace(".", "").replace("₫", "")) - int(
                                    km_them)
                                print(f"km_them: {km_them}")
                                print(f"gia_khuyen_mai: {gia_khuyen_mai_new}")
                                return gia_khuyen_mai_new
                        else:
                            gia_khuyen_mai_new=int(
                                gia_khuyen_mai_raw.replace("đ", "").replace(".", "").replace("₫", ""))
                            return gia_khuyen_mai_new
                # This snippet is for the preorder product - end
            else:
                option_km_them=driver.find_elements(By.CLASS_NAME, "label-radio")
                for i in option_km_them:
                    # if ("đ" in i.text.lower()) and ("ava" not in i.text.lower()) and ("xanh" not in i.text.lower()):
                    # if ("đ" in i.text.lower()) and ("ava" not in i.text.lower()) and ("xanh" not in i.text.lower())
                    # and ("giảm" in i.text.lower()):

                    # This is chatGPT's recommendation code (solution 1)
                    # if re.search(r"đ", i.text) and not re.search(r"\bava\b", i.text, re.IGNORECASE) and not re.search(
                    #         r"\bxanh\b", i.text, re.IGNORECASE) and re.search(r"giảm", i.text, re.IGNORECASE):

                    pattern=r"(?=.*giảm)(?=.*đ)(?!.*ava)(?!.*xanh)"
                    match=re.search(pattern, i.text, re.IGNORECASE)
                    if match:
                        print("yes")

                        try:
                            km_them=int(
                                driver.find_element(By.CLASS_NAME, "label-radio").text.replace("Giảm giá ", "")
                                .replace("đ", "").replace("*", "").replace(".", "").replace(",", ""))
                            print(f"km_them: {km_them}")
                        except ValueError:
                            # This happends only when there is no cash discount, but the channel
                            # offers promotion in kind instead
                            km_them=0
                            print(f"km_them: {km_them}")
                        gia_khuyen_mai_new=int(gia_khuyen_mai_raw.replace("đ", "").replace(".", "").replace("₫", "")) \
                                           - int(km_them)

                        print(f"gia_khuyen_mai: {gia_khuyen_mai_new}")
                        return gia_khuyen_mai_new
                    else:
                        gia_khuyen_mai_new=gia_khuyen_mai_raw
                        return gia_khuyen_mai_new
            # finally:
            #     return gia_khuyen_mai_new

        def check_price():

            def check_ton_kho():
                # 1 Check ton_kho
                try:
                    special_note=driver.find_element(By.CLASS_NAME, "productstatus")
                except NoSuchElementException:
                    try:
                        driver.find_element(By.CLASS_NAME, "buttonsub")  # Just for Pre-order
                    except NoSuchElementException:
                        ton_kho="Yes"
                    else:
                        ton_kho="No"
                else:
                    # This case will appear in case of preorder and not trading
                    if ("ngừng" or "tạm" or 'sắp') in special_note.text.lower():
                        ton_kho="not trading"
                    else:
                        ton_kho="No"
                return ton_kho

            ton_kho = check_ton_kho()

            # Try if the Special price applied for some special store exist
            try:
                green_price_box=driver.find_element(By.XPATH, "//div[@class='box-price green jsClick']")
                actions.click(green_price_box).perform()
                time.sleep(1.5)
                is_green_price_box_present=True
            except NoSuchElementException:
                is_green_price_box_present=False
                pass

            # wait.until(EC.visibility_of_element_located((By.TAG_NAME, "h1")))
            soup=BeautifulSoup(driver.page_source, 'html.parser')
            try:
                product_name=driver.find_element(By.TAG_NAME, "h1").text.strip().replace("Mini", "mini")
                to_remove_in_name=["Điện thoại ", "Máy tính bảng ", "Laptop Apple ", "Tai nghe chụp tai Bluetooth ",
                                   "Tai nghe Bluetooth ", "Thiết bị định vị thông minh "]
                for item in to_remove_in_name:
                    if item in product_name:
                        product_name=product_name.replace(item, "")
            except NoSuchElementException:
                product_name=f"Please double check the link: {link}"
            print(product_name)


            # 2A Check khuyen_mai
            try:
                khuyen_mai=driver.find_element(By.CLASS_NAME, "pr-item").text.replace("Xem chi tiết",
                                                                                      ",").strip().replace("\n\n\n",
                                                                                                           "\n")
            except NoSuchElementException:
                khuyen_mai=""

            # 2B Check thanh_toan_detail == uu_dai_them khi thanh toán VNPAY,Moca

            thanh_toan_raw=soup.find_all("div", class_ = "campaign")
            if len(thanh_toan_raw) == 0:
                thanh_toan_detail=""
            else:
                thanh_toan_detail=thanh_toan_raw[0].get_text().strip().replace("\n\n\n", "\n").replace("\n?", "")
            try:
                # uu_dai_them = driver.find_element(By.CLASS_NAME, "promoadd").text.strip().replace("\n\n\n", "\n")
                uu_dai_them=soup.find_all('div', class_ = "promoadd")[0].get_text().strip().replace("\n\n\n", "\n")
            # except NoSuchElementException:
            except IndexError:
                uu_dai_them=""

            khuyen_mai=khuyen_mai + "\n" + thanh_toan_detail

            # 3 Check giá sau khuyến mại tiền mặt
            shock = check_shock_price()
            print("shock", shock)
            if shock:
                gia_khuyen_mai, gia_niem_yet=shock
                price_fighting=0
                store_fighting=""

            else:
                # Check gia_khuyen_mai == giamsoc-ol-price (Giảm Sốc CHỈ KHI có Lễ Tết) / if not == daily/regular gia_khuyen_mai
                try:
                    # This gia_khuyen_mai appears only in special occasion of Tet, Holiday
                    driver.find_element(By.CLASS_NAME, "giamsoc-ol-price")
                except NoSuchElementException:
                    try:
                        # This is the daily/regular gia_khuyen_mai
                        driver.find_element(By.CLASS_NAME, "box-price-present")

                        gia_khuyen_mai_raw_test=len(driver.find_elements(By.CLASS_NAME, "box-price-present"))
                        if gia_khuyen_mai_raw_test == 1:
                            gia_khuyen_mai_raw=driver.find_element(By.CLASS_NAME, "box-price-present").text.replace(
                                "Giá dự kiến: ", "").replace("*", "")
                            gia_khuyen_mai=check_cash_discount(gia_khuyen_mai_raw)
                        elif gia_khuyen_mai_raw_test == 2:
                            gia_khuyen_mai_raw=driver.find_elements(By.CLASS_NAME, "box-price-present")[1].text.replace(
                                "Giá dự kiến: ", "").replace("*", "")
                            gia_khuyen_mai=check_cash_discount(gia_khuyen_mai_raw)

                    except NoSuchElementException:
                        try:
                            # This happens when the product stop trading/not trading
                            special_note=driver.find_element(By.CLASS_NAME, "productstatus")
                            if ("ngừng" or "tạm" or 'sắp') in special_note.text.lower():
                                gia_khuyen_mai="not trading"
                            else:
                                gia_khuyen_mai= 'not trading'
                        except NoSuchElementException:
                            # temporary used for MacBook Pro M2
                            try:
                                gia_khuyen_mai_raw=driver.find_element(By.CSS_SELECTOR, ".center b").text.replace(
                                    'Giá bán:', '').strip()
                                gia_khuyen_mai=check_cash_discount(gia_khuyen_mai_raw)
                            except NoSuchElementException:
                                gia_khuyen_mai=0
                else:
                    gia_khuyen_mai=driver.find_element(By.CLASS_NAME, "giamsoc-ol-price"). \
                        text.replace("Giá dự kiến: ", "").replace("*", "")

                # 4 B Check gia_niem_yet
                try:
                    driver.find_element(By.CLASS_NAME, "box-price-old")
                except NoSuchElementException:
                    try:
                        gia_niem_yet=driver.find_element(By.CLASS_NAME, "box-price-present").text.replace(
                            "Giá dự kiến: ",
                            "")
                    except NoSuchElementException:
                        try:
                            driver.find_element(By.CLASS_NAME, "productstatus")
                        except NoSuchElementException:
                            # This created when the MacBok M2 start to preorder and have promotion
                            try:
                                gia_niem_yet=driver.find_element(By.CSS_SELECTOR, ".center b").text.replace('Giá bán:',
                                                                                                            '')
                                # print(gia_niem_yet)
                            except NoSuchElementException:
                                gia_niem_yet=0
                        else:
                            special_note=driver.find_element(By.CLASS_NAME, "productstatus")
                            if ("ngừng" or "tạm" or 'sắp') in special_note.text.lower():
                                gia_niem_yet="not trading"
                            else:
                                gia_niem_yet= 'not trading'
                else:
                    gia_niem_yet=driver.find_element(By.CLASS_NAME, "box-price-old").text.replace("Giá dự kiến: ", "")

                # Price Fighting at specific stores
                if is_green_price_box_present == True:
                    price_fighting_raw=driver.find_elements(By.CLASS_NAME, "box-price-present")[0].text.replace(
                        "Giá dự kiến: ", "").replace("*", "")
                    price_fighting=check_cash_discount(price_fighting_raw)
                    # store_fighting = soup.find_all("div", class_="fstore expand")[0].get_text()
                    try:
                        store_fighting=soup.find("div", class_ = "fstore expand").get_text()
                    except AttributeError:
                        store_fighting="No detail about stores where store_fighting is applied"
                else:
                    price_fighting=0
                    store_fighting=""

            if vnpay_flag == True:
                if price_fighting !=0:
                    price_to_calculate_vnpay = price_fighting
                else:
                    price_to_calculate_vnpay = gia_khuyen_mai
                gia_khuyen_mai_vnpay = vnpay_mw_table(price_to_calculate_vnpay, oc_vnpay_dict, now)

                screenshot_name = screen_shot(product_name)

                new_data={"Product_Name": product_name, "Ton_Kho": ton_kho, "Gia_Niem_Yet": gia_niem_yet,
                          "Gia_Khuyen_Mai": gia_khuyen_mai, "Chien_Gia": price_fighting, '+VNPAY': gia_khuyen_mai_vnpay,
                          "Store_Chien": store_fighting,
                          "Date": now,
                          "Khuyen_Mai": khuyen_mai, "Uu_Dai_Them": uu_dai_them, "Link": link,
                          'screenshot_name': screenshot_name
                          }
                data_list.append(new_data)
            else:
                if price_fighting !=0:
                    price_to_calculate_vnpay = price_fighting
                else:
                    price_to_calculate_vnpay = gia_khuyen_mai
                gia_khuyen_mai_vnpay = vnpay_mw(price_to_calculate_vnpay, khuyen_mai)

                screenshot_name = screen_shot(product_name)

                new_data={"Product_Name": product_name, "Ton_Kho": ton_kho, "Gia_Niem_Yet": gia_niem_yet,
                          "Gia_Khuyen_Mai": gia_khuyen_mai, "Chien_Gia": price_fighting, '+VNPAY': gia_khuyen_mai_vnpay, "Store_Chien": store_fighting,
                          "Date": now,
                          "Khuyen_Mai": khuyen_mai, "Uu_Dai_Them": uu_dai_them, "Link": link,
                          'screenshot_name': screenshot_name
                          }
                data_list.append(new_data)

        self.link_check = link_check
        self.restart_link = restart_link

        for link in self.link_check[self.link_check.index(self.restart_link):]:
            print(link)
            data_list=[]
            try:
                driver.get(link)
                time.sleep(10)
                check_price()
                record(data_list)
            except TimeoutException:

                print(datetime.now())
                print(f"Start again from link: {link}")
                self.restart_link=link
                self.mw(self.restart_link)
            except WebDriverException:
                pass
        driver.quit()


ModuleNotFoundError: No module named 'pyvirtualdisplay'