In [2]:
##SN01083035 Uwais Ilahi B Haja Badurudeen
## Install required dependencies (run these in your terminal or command prompt)
# pip install webdriver-manager  # Manages and downloads the correct ChromeDriver version automatically
# pip install selenium           # Installs the Selenium package for web automation

# Import necessary modules
from selenium import webdriver  # Controls the Chrome browser
from selenium.webdriver.common.by import By  # Helps locate elements on a webpage
import time  # Used to add delays in execution
import json  # Handles JSON data parsing
import csv  # Provides functionality for writing CSV files
import re  # Provides support for regular expressions

# Define the URL of the Lazada product page
url = "https://www.lazada.com.my/products/sambal-nyet-berapi-ori-dari-khairulaming-i2623278934-s11857920384.html?c=&channelLpJumpArgs=&clickTrackInfo=query%253Asambal%25252520nyet%253Bnid%253A2623278934%253Bsrc%253ALazadaMainSrp%253Brn%253Abf2de2d14f45c1d0a6c560d1677b35f8%253Bregion%253Amy%253Bsku%253A2623278934_MY%253Bprice%253A19.9%253Bclient%253Adesktop%253Bsupplier_id%253A300156873084%253Bbiz_source%253Ah5_internal%253Bslot%253A0%253Butlog_bucket_id%253A470687%253Basc_category_id%253A10003066%253Bitem_id%253A2623278934%253Bsku_id%253A11857920384%253Bshop_id%253A2002750%253BtemplateInfo%253A107880_D_E%2523-1_A3_C%2523&freeshipping=1&fs_ab=2&fuse_fs=&lang=en&location=Selangor&price=19.9&priceCompare=skuId%3A11857920384%3Bsource%3Alazada-search-voucher%3Bsn%3Abf2de2d14f45c1d0a6c560d1677b35f8%3BoriginPrice%3A1990%3BdisplayPrice%3A1990%3BsinglePromotionId%3A-1%3BsingleToolCode%3A-1%3BvoucherPricePlugin%3A0%3Btimestamp%3A1741419111378&ratingscore=4.959580838323354&request_id=bf2de2d14f45c1d0a6c560d1677b35f8&review=668&sale=3057&search=1&source=search&spm=a2o4k.searchList.list.0&stock=1"

# Use a regular expression to extract the item ID from the URL
match = re.search(r"i(\d+)-s", url)
item_id = match.group(1)  # Extract the matched item ID

# Configure Selenium WebDriver options
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")  # Open Chrome in maximized mode
options.add_argument("--disable-blink-features=AutomationControlled")  # Prevent detection by websites

# Initialize the Selenium WebDriver (Chrome)
driver = webdriver.Chrome(options=options)

# Construct the Lazada review API URL using the extracted item ID
base_url = f'https://my.lazada.com.my/pdp/review/getReviewList?itemId={item_id}&pageSize=5&filter=0&sort=0&pageNo='

# Open the first review page to allow manual CAPTCHA solving
driver.get(base_url + "1")

# 🛑 Pause execution and wait for the user to solve the CAPTCHA manually
input("Solve the CAPTCHA in the opened browser, then press Enter to continue...")

# Open a CSV file to store the reviews
with open("sambal_nyet_review.csv", "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Buyer Name", "Review", "Date"])  # Write CSV header

    # Loop through the first 5 pages of reviews
    for page in range(1, 6):
        url = base_url + str(page)  # Construct the URL for the current page
        print(f"Fetching: {url}")  # Display the URL being fetched
        driver.get(url)  # Open the page in the browser
        time.sleep(3)  # Wait for the page to load

        # Extract the page content (which is in JSON format)
        page_source = driver.find_element(By.TAG_NAME, "body").text
        data = json.loads(page_source)  # Parse the JSON response

        # Extract reviews from "items", ensuring only top-level reviews are considered
        reviews = [review for review in data.get("model", {}).get("items", []) if "reviewTime" in review]

        # Loop through each review and extract relevant details
        for review in reviews:
            writer.writerow([
                review.get("buyerName", "N/A"),  # Extract buyer name
                review.get("reviewContent", "N/A").replace("\n", " "),  # Extract and clean review content
                review.get("reviewTime", "N/A")  # Extract review date
            ])

print("Reviews saved to sambal_nyet_review.csv")  # Confirm successful saving

# Close the Selenium WebDriver
driver.quit()


Solve the CAPTCHA in the opened browser, then press Enter to continue... 


Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=2623278934&pageSize=5&filter=0&sort=0&pageNo=1
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=2623278934&pageSize=5&filter=0&sort=0&pageNo=2
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=2623278934&pageSize=5&filter=0&sort=0&pageNo=3
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=2623278934&pageSize=5&filter=0&sort=0&pageNo=4
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=2623278934&pageSize=5&filter=0&sort=0&pageNo=5
Reviews saved to sambal_nyet_review.csv
