In [1]:
# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

In [2]:
# Set up Chrome WebDriver with options
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Run in headless mode (no UI)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

# Initialize Chrome WebDriver
service = Service('chromedriver')  # Ensure chromedriver is in your PATH or provide the full path
driver = webdriver.Chrome(service=service, options=chrome_options)

# Navigate to the website
url = "https://www.souvanny.la"
driver.get(url)

# Wait for page to load and get the HTML content
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "cate_home")))
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')

# Extract category links
categories = soup.find_all('div', class_='cate_home')
links_list = [link['href'] for category in categories for link in category.find_all('a', href=True)]

product_names = []
product_prices = []

def set_zoom_level(driver, zoom_factor):
    """Set the zoom level of the page via JavaScript."""
    driver.execute_script(f"document.body.style.zoom='{zoom_factor}'")

def scroll_and_collect_products(container_class, loader_class):
    SCROLL_PAUSE_TIME = 5  # Time to wait after each scroll

    try:
        container = driver.find_element(By.CLASS_NAME, container_class)
    except NoSuchElementException as e:
        print(f"Container not found: {e}")
        return

    last_height = driver.execute_script("return arguments[0].scrollHeight", container)

    while True:
        # Scroll the container
        driver.execute_script("arguments[0].scrollTop += arguments[0].clientHeight", container)
        time.sleep(SCROLL_PAUSE_TIME)

        # Scroll window to ensure the end of the container is visible
        driver.execute_script("window.scrollBy(0, arguments[0].clientHeight)", container)
        time.sleep(SCROLL_PAUSE_TIME)

        new_height = driver.execute_script("return arguments[0].scrollHeight", container)
        print(f"Last Height: {last_height}, New Height: {new_height}")

        if new_height == last_height:
            print("Reached the end of the container.")
            break

        last_height = new_height

# Scrape product information from each category
for link in links_list:
    driver.get(link)
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "cc-content")))

    # Set zoom level to 50% to fit more content on the screen
    set_zoom_level(driver, "50%")

    scroll_and_collect_products("cc-content", ".loader")

    # Parse page content after scrolling
    html = driver.page_source
    soup = BeautifulSoup(html, 'lxml')

    # Find and store product information
    products = soup.find_all('div', class_='grid-group-item')
    for product in products:
        try:
            product_name = product.find('div', class_='c0122').text.strip()
            product_price = product.find('div', class_='c0123').text.strip().replace("                                                   ", "")
            product_names.append(product_name)
            product_prices.append(product_price)
        except AttributeError:
            continue

Ketinggian Terakhir: 1136, Ketinggian Baru: 1188
Ketinggian Terakhir: 1188, Ketinggian Baru: 2256
Ketinggian Terakhir: 2256, Ketinggian Baru: 2256
Mencapai akhir kontainer.
Ketinggian Terakhir: 1136, Ketinggian Baru: 1188
Ketinggian Terakhir: 1188, Ketinggian Baru: 2256
Ketinggian Terakhir: 2256, Ketinggian Baru: 2256
Mencapai akhir kontainer.
Ketinggian Terakhir: 576, Ketinggian Baru: 576
Mencapai akhir kontainer.
Ketinggian Terakhir: 1136, Ketinggian Baru: 1188
Ketinggian Terakhir: 1188, Ketinggian Baru: 2256
Ketinggian Terakhir: 2256, Ketinggian Baru: 2256
Mencapai akhir kontainer.
Ketinggian Terakhir: 1136, Ketinggian Baru: 1188
Ketinggian Terakhir: 1188, Ketinggian Baru: 2256
Ketinggian Terakhir: 2256, Ketinggian Baru: 2256
Mencapai akhir kontainer.
Ketinggian Terakhir: 1136, Ketinggian Baru: 1188
Ketinggian Terakhir: 1188, Ketinggian Baru: 2256
Ketinggian Terakhir: 2256, Ketinggian Baru: 2256
Mencapai akhir kontainer.
Ketinggian Terakhir: 1136, Ketinggian Baru: 1188
Ketinggian Te

In [3]:
# Close the WebDriver
driver.quit()

In [4]:
# Create a DataFrame from the data
df = pd.DataFrame({
    'Product Name': product_names,
    'Product Price': product_prices
})

# Dataframe
df

Unnamed: 0,Product Name,Product Price
0,+73749 ກະໂລ້ປູພື້ນ ພາດມີນາ ກອດ ສີຂາວ (ເຊລະມິກ)...,"135,413 ກີບ/ກ່ອງ"
1,+73846 ກະໂລ້ຕິດຝາ ໄມລ່າ ຂາວ (HYG) (Random) (ເຊ...,"278,587 ກີບ/ກ່ອງ"
2,+73849 ກະໂລ້ຕິດຝາ ແຊັກວູດ ນ້ຳຕານແດງ (FT) (ເຊລະ...,"278,587 ກີບ/ກ່ອງ"
3,+73848 ກະໂລ້ຕິດຝາ ແຊັກວູດ ນ້ຳຕານທອງ (FT) (ເຊລະ...,"278,587 ກີບ/ກ່ອງ"
4,+44662 ກະໂລ້ຕິດຝາ ໄມ້ຊາຕີ ນ້ຳຕານແດງ (ເຊລະມິກ) ...,"278,587 ກີບ/ກ່ອງ"
...,...,...
742,+37976 (4Z) ນ້ຳຢາເຮັດຄວາມສະອາດ ເກດຟໍລັດ ເຟີເຟກ...,"57,022 ກີບ/ຕຸກ"
743,+37975 (4Z) ນ້ຳຢາເຮັດຄວາມສະອາດ ເກດລາເວັນເດີ້ ມ...,"57,022 ກີບ/ຕຸກ"
744,+38005 (5Z) ນ້ຳຢາດັນຝຸ່ນ ປັ່ນເງົາ (1ລິດ) (12ຕຸ...,"105,055 ກີບ/ຕຸກ"
745,+37996 (5Z) ນ້ຳຢາເຊັດແວ່ນ ເງົາ (ຟ໋ອກກີ້) (500m...,"50,908 ກີບ/ຕຸກ"
