In [5]:
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from time import sleep
import random
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from tqdm import tqdm

##### 1. Load function **get_Data_On_Page, get_Data_Detail_1Product** from file **Main_function.py**

In [6]:
from Main_function import get_Data_On_Page, get_Data_Detail_1Product

# URL Lazada: dienthoai, sort price high to low
path = 'chromedriver.exe'
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-notifications')
chrome_options.add_argument('--disable-infobars')

service = Service(executable_path=path)
driver = webdriver.Chrome(service=service, options=chrome_options)

#### 2. Crawl data

##### Crawl features displayed on the outside of the page

In [8]:
#Empty list, to store data from all pages
all_data = []

#Nhận link cho 4 page tiếp theo và sau đó đưa vào vòng lặp for cùng với hàm get_Data_On_Page
for page in tqdm(range(2, 6), desc="Crawling pages"):
    main_link = f'https://www.lazada.vn/catalog/?_keyori=ss&from=search_history&page={page}&q=tivi&spm=a2o4n.home-vn.search.2.19053bdc7EjPPP&sugg=tivi_0_1'
    driver.get(main_link)
    sleep(random.uniform(2,5))                             #Random sleep to mimic human behavior and avoid getting blocked

    tivi = get_Data_On_Page(driver)
    all_data.append(tivi)                             

Crawling pages: 100%|██████████| 4/4 [02:19<00:00, 34.89s/it]


In [9]:
#Combine into a DataFrame
data_02_05 = pd.concat(all_data, ignore_index=True)
data_02_05.index = np.arange(1, len(data_02_05)+1)
data_02_05

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location
1,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 55 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,8.589.000 ₫,33% Off,1.1K Đã bán,(234),Hồ Chí Minh
2,tivi,GIAO HÀ NỘI - Smart Tivi Casper 4K 50 inch 50U...,https://www.lazada.vn/products/giao-ha-noi-sma...,5.389.000 ₫,46% Off,130 Đã bán,(7),Hà Nội
3,tivi,Thanh lý smart Tivi LG chính hãng 42inch siêu ...,https://www.lazada.vn/products/thanh-ly-smart-...,1.998.000 ₫,20% Off,6 Đã bán,(6),Hà Nội
4,tivi,Smart Tivi Coocaa Full HD 43 Inch 43S3U,https://www.lazada.vn/products/smart-tivi-cooc...,3.990.000 ₫,50% Off,,,Hà Nội
5,tivi,[6.6 HOTDEAL 3790K - Tặng Giá Treo TV DP243 ch...,https://www.lazada.vn/products/66-hotdeal-3790...,4.290.000 ₫,7% Off,52 Đã bán,(13),Bắc Ninh
...,...,...,...,...,...,...,...,...
156,tivi,Tivi oled LG OLED55A3PSA 55 inch 4K WebOS,https://www.lazada.vn/products/tivi-oled-lg-ol...,25.990.000 ₫,35% Off,,,Hà Nội
157,tivi,Google Tivi Sony 4K 43 inch K-43S30 - Mới 2024,https://www.lazada.vn/products/google-tivi-son...,15.990.000 ₫,20% Off,,,Hà Nội
158,tivi,Tivi Led Samsung UA43BU8000KXXV 43 inch 4K Ult...,https://www.lazada.vn/products/tivi-led-samsun...,10.890.000 ₫,11% Off,,,Hà Nội
159,tivi,Android Tivi Casper 43 inch 43FG5200,https://www.lazada.vn/products/android-tivi-ca...,4.590.000 ₫,45% Off,,,Hà Nội


##### Crawl features data for each product

In [10]:
#Initialize dictionary to store details for all products
detail_products = {
    "Price_original": [],
    "Ship_price": [],
    "Return": [],
    "Sale_rating": [],
    "Ship_on_time": [],
    "Chat_response": [],
    "One_star": [],
    "Two_star": [],
    "Three_star": [],
    "Four_star": [],
    "Five_star": []
}

#Create a progress bar to track the crawl process
for link in tqdm(data_02_05.Link, desc = "Crawling product details"):
    details = get_Data_Detail_1Product(driver, link)
    for key in detail_products.keys():
        detail_products[key].append(details[key])

Crawling product details: 100%|██████████| 160/160 [1:28:37<00:00, 33.24s/it]


In [11]:
#Add columns to combined_data
for key, value in detail_products.items():
    data_02_05[key] = value

# Convert to csv
data_02_05.to_csv("Page_02_05.csv", index=False)
data_02_05

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location,Price_original,Ship_price,Return,Sale_rating,Ship_on_time,Chat_response,One_star,Two_star,Three_star,Four_star,Five_star
1,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 55 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,8.589.000 ₫,33% Off,1.1K Đã bán,(234),Hồ Chí Minh,12.900.000 ₫,159.200 ₫,30 Ngày Trả Hàng Miễn Phí,Không đủ thông tin,100%,100%,[1],[0],[0],[1],[232]
2,tivi,GIAO HÀ NỘI - Smart Tivi Casper 4K 50 inch 50U...,https://www.lazada.vn/products/giao-ha-noi-sma...,5.389.000 ₫,46% Off,130 Đã bán,(7),Hà Nội,9.890.000 ₫,,,100%,Không đủ thông tin,100%,[0],[0],[0],[0],[7]
3,tivi,Thanh lý smart Tivi LG chính hãng 42inch siêu ...,https://www.lazada.vn/products/thanh-ly-smart-...,1.998.000 ₫,20% Off,6 Đã bán,(6),Hà Nội,2.500.000 ₫,198.200 ₫,,91%,90%,100%,[0],[0],[0],[0],[6]
4,tivi,Smart Tivi Coocaa Full HD 43 Inch 43S3U,https://www.lazada.vn/products/smart-tivi-cooc...,3.990.000 ₫,50% Off,,,Hà Nội,7.990.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
5,tivi,[6.6 HOTDEAL 3790K - Tặng Giá Treo TV DP243 ch...,https://www.lazada.vn/products/66-hotdeal-3790...,4.290.000 ₫,7% Off,52 Đã bán,(13),Bắc Ninh,4.590.000 ₫,Miễn phí,30 Ngày Trả Hàng Miễn Phí,96%,100%,99%,[0],[0],[0],[1],[13]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,tivi,Tivi oled LG OLED55A3PSA 55 inch 4K WebOS,https://www.lazada.vn/products/tivi-oled-lg-ol...,25.990.000 ₫,35% Off,,,Hà Nội,39.900.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
157,tivi,Google Tivi Sony 4K 43 inch K-43S30 - Mới 2024,https://www.lazada.vn/products/google-tivi-son...,15.990.000 ₫,20% Off,,,Hà Nội,,,,,,,[0],[0],[0],[0],[0]
158,tivi,Tivi Led Samsung UA43BU8000KXXV 43 inch 4K Ult...,https://www.lazada.vn/products/tivi-led-samsun...,10.890.000 ₫,11% Off,,,Hà Nội,12.190.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
159,tivi,Android Tivi Casper 43 inch 43FG5200,https://www.lazada.vn/products/android-tivi-ca...,4.590.000 ₫,45% Off,,,Hà Nội,8.290.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
