In [1]:
import os
import pandas as pd
from random import randint
from time import sleep
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
from tqdm.notebook import tqdm

In [2]:
options = uc.ChromeOptions()
options.add_experimental_option(
    "prefs",
    {
        "download.default_directory": os.getcwd(),
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "plugins.always_open_pdf_externally": True,
    },
)

options.add_argument("--disable-gpu")
options.add_argument("--disable-popup-blocking")

options.headless = True
options.add_argument("--headless")
options.add_argument("window-size=1920,1080")

webdriver = uc.Chrome(options=options)

webdriver.set_page_load_timeout(120)
webdriver.maximize_window()

In [3]:
product_category_links_df = pd.read_csv("./product_category_links.csv")
product_category_links_df.head(10)

Unnamed: 0,category,product_link
0,Star PC,https://www.startech.com.bd/amd-ryzen-5-5600g-...
1,Star PC,https://www.startech.com.bd/amd-ryzen-5-5600g-...
2,Star PC,https://www.startech.com.bd/intel-12th-gen-cor...
3,Star PC,https://www.startech.com.bd/amd-ryzen-7-5700g-...
4,Star PC,https://www.startech.com.bd/amd-ryzen-5-8500g-...
5,Star PC,https://www.startech.com.bd/intel-13th-gen-cor...
6,Gaming PC,https://www.startech.com.bd/amd-ryzen-5-3400g-...
7,Gaming PC,https://www.startech.com.bd/amd-ryzen-5-2400g-...
8,Gaming PC,https://www.startech.com.bd/amd-ryzen-5-5600gt...
9,Gaming PC,https://www.startech.com.bd/amd-ryzen-5-5500-g...


In [4]:
product_details = {"name": [], "price": [], "category": [], "specification": []}

start = 2281
end = 2500

for idx in tqdm(range(start, end)):

    product_link = product_category_links_df.loc[idx, "product_link"]
    category = product_category_links_df.loc[idx, "category"]

    # print(f"Current product link: {product_link}  ", end="\r", flush=True)
    webdriver.get(product_link)
    sleep(randint(1, 2))

    try:
        product_name = webdriver.find_element(
            By.XPATH, '//h1[@class="product-name"]'
        ).text.strip()

        product_price = (
            webdriver.find_element(
                By.XPATH,
                '//div[@class="product-price-options"]/label[contains(@class, "cash")]/span[@class="price-new" or @class="price"]',
            )
            .text.replace(",", "")
            .replace("৳", "")
            .strip()
        )
        if product_price.find(" ") != -1:
            product_price = product_price[0:product_price.find(" ")].strip()

        key_features_elements = webdriver.find_elements(
            By.XPATH,
            '//h2[text()="Key Features"]//following-sibling::ul[1]/li[not(text()="View More Info")]',
        )

        product_key_features = "".join(
            [f"{element.text.strip()}\n" for element in key_features_elements]
        )

        spec_elements = webdriver.find_elements(
            By.XPATH, '//section[@id="specification"]//tbody/tr'
        )

        product_spec = ""
        for element in spec_elements:
            spec_name = element.find_elements(By.TAG_NAME, "td")[0].text.strip()
            specs = (
                element.find_elements(By.TAG_NAME, "td")[-1]
                .text.replace("\n", ". ")
                .strip()
            )
            product_spec += f"{spec_name}: {specs}\n"

        product_details["name"].append(product_name)
        product_details["price"].append(product_price)
        product_details["category"].append(category)
        product_details["specification"].append(product_spec)

    except Exception as e:
        print(f"Error at product link: {product_link}\n")
        print(f"Error at idx: {idx}, error: {e}")

  0%|          | 0/219 [00:00<?, ?it/s]

In [5]:
product_details_df = pd.DataFrame(product_details)
product_details_df.head(10)

Unnamed: 0,name,price,category,specification
0,ZTE nubia Neo 5G (8/256GB),18200,Phone,Size: 6.6 inches\nType: IPS LCD\nResolution: F...
1,XP-Pen Star-G430S Ultra-Thin Digital Drawing G...,2800,Graphics Tablet,Stylus Pen: PN01_B/P01 Passive pen\nActive are...
2,Huion H430P Graphics Tablet,2900,Graphics Tablet,Stylus Pen: Yes\nActive area: 121.9 x 76.2mm\n...
3,XP-Pen Star-G640 Ultrathin Digital Drawing Gra...,3450,Graphics Tablet,Stylus Pen: PN01_B/P01 Passive pen\nActive are...
4,Huion Inspiroy RTE-100 Graphics Drawing Tablet,3550,Graphics Tablet,Stylus Pen: PW400\nActive area: 121.9 x 76.2mm...
5,Huion H640P Graphics Tablet,3900,Graphics Tablet,Stylus Pen: Yes.\nInterface: Micro USB\nCompat...
6,XP-Pen Deco 640 Graphics Tablet,4200,Graphics Tablet,Stylus Pen: P01(16K)\nActive area: 16cm x 9cm\...
7,"XP-Pen Deco Mini 7 4.37"" Drawing Graphics Tablet",5400,Graphics Tablet,Stylus Pen: Battery-free stylus\nActive area: ...
8,Huion Inspiroy RTS-300 Graphics Drawing Tablet,5500,Graphics Tablet,Stylus Pen: Yes\nActive area: 160 x 100mm\nTil...
9,XP-Pen Star G960S Plus Digital Drawing Graphic...,5550,Graphics Tablet,Stylus Pen: PH2 Battery-free Stylus (featuring...


In [8]:
product_details_df.to_excel(f"./product_details_{start}_{end}.xlsx", index=False, engine='xlsxwriter')