In [1]:
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from time import sleep
import random
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from tqdm import tqdm

##### 1. Load function **get_Data_On_Page, get_Data_Detail_1Product** from file **Main_function.py**

In [2]:
from Main_function import get_Data_On_Page, get_Data_Detail_1Product

# URL Lazada: tivi
path = 'chromedriver.exe'
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-notifications')
chrome_options.add_argument('--disable-infobars')

service = Service(executable_path=path)
driver = webdriver.Chrome(service=service, options=chrome_options)

#### 2. Crawl data

##### Crawl features displayed on the outside of the page

In [3]:
#Empty list, to store data from all pages
all_data = []

#Nhận link cho 5 page: 11-15 và sau đó đưa vào vòng lặp for cùng với hàm get_Data_On_Page
for page in tqdm(range(11, 16), desc="Crawling pages"):
    main_link = f'https://www.lazada.vn/catalog/?_keyori=ss&from=search_history&page={page}&q=tivi&spm=a2o4n.home-vn.search.6.4d423bdcueaQbu&sugg=tivi_4_1'
    driver.get(main_link)
    sleep(random.uniform(2,5))                             #Random sleep to mimic human behavior and avoid getting blocked

    tivi = get_Data_On_Page(driver)
    all_data.append(tivi)                             

Crawling pages:   0%|          | 0/5 [00:00<?, ?it/s]

Crawling pages: 100%|██████████| 5/5 [02:24<00:00, 28.85s/it]


In [4]:
#Combine into a DataFrame
data_11_15 = pd.concat(all_data, ignore_index=True)
data_11_15.index = np.arange(1, len(data_11_15)+1)
data_11_15

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location
1,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 55 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,8.589.000 ₫,33% Off,1.1K Đã bán,(234),Hồ Chí Minh
2,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 50 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,8.189.000 ₫,28% Off,103 Đã bán,(26),Hồ Chí Minh
3,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 65 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,11.589.000 ₫,27% Off,324 Đã bán,(64),Hồ Chí Minh
4,tivi,[ 6.6 HOTDEAL 7290K - Tặng Giá Treo TV DP243 c...,https://www.lazada.vn/products/66-hotdeal-7290...,7.847.300 ₫,44% Off,1.7K Đã bán,(682),Hồ Chí Minh
5,tivi,[ 6.6 HOTDEAL 5290K - Tặng Giá Treo TV DP243 c...,https://www.lazada.vn/products/66-hotdeal-5290...,6.492.000 ₫,38% Off,494 Đã bán,(142),Hồ Chí Minh
...,...,...,...,...,...,...,...,...
196,tivi,Tivi Sony Mới Nhất 2024 - BRAVIA 3 43 Inch | 4...,https://www.lazada.vn/products/tivi-sony-moi-n...,14.790.000 ₫,15% Off,,(1),Bình Dương
197,tivi,Google Tivi TCL 4K 65 inch 65P745 - Hàng chính...,https://www.lazada.vn/products/google-tivi-tcl...,10.190.000 ₫,,,,Bình Dương
198,tivi,Tivi Led TCL 32S5400A 32 inch Android TV,https://www.lazada.vn/products/tivi-led-tcl-32...,4.490.000 ₫,43% Off,,,Hà Nội
199,tivi,[6.6 HOTDEAL 27990K - Tặng Loa S522W cho 20 đơ...,https://www.lazada.vn/products/66-hotdeal-2799...,37.990.000 ₫,5% Off,,(1),Hồ Chí Minh


##### Crawl features data for each product

In [5]:
#Initialize dictionary to store details for all products
detail_products = {
    "Price_original": [],
    "Ship_price": [],
    "Return": [],
    "Sale_rating": [],
    "Ship_on_time": [],
    "Chat_response": [],
    "One_star": [],
    "Two_star": [],
    "Three_star": [],
    "Four_star": [],
    "Five_star": []
}

#Create a progress bar to track the crawl process
for link in tqdm(data_11_15.Link, desc = "Crawling product details"):
    details = get_Data_Detail_1Product(driver, link)
    for key in detail_products.keys():
        detail_products[key].append(details[key])

Crawling product details: 100%|██████████| 200/200 [1:46:09<00:00, 31.85s/it]


In [6]:
#Add columns to combined_data
for key, value in detail_products.items():
    data_11_15[key] = value

# Convert to csv
data_11_15.to_csv("Page_11_15.csv", index=False)
data_11_15

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location,Price_original,Ship_price,Return,Sale_rating,Ship_on_time,Chat_response,One_star,Two_star,Three_star,Four_star,Five_star
1,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 55 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,8.589.000 ₫,33% Off,1.1K Đã bán,(234),Hồ Chí Minh,12.900.000 ₫,159.200 ₫,30 Ngày Trả Hàng Miễn Phí,Không đủ thông tin,100%,100%,[1],[0],[0],[1],[232]
2,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 50 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,8.189.000 ₫,28% Off,103 Đã bán,(26),Hồ Chí Minh,,,,,,,[0],[0],[0],[0],[0]
3,tivi,[SALE 6.6] Smart Tivi Samsung 4K UHD 65 inch U...,https://www.lazada.vn/products/sale-66-smart-t...,11.589.000 ₫,27% Off,324 Đã bán,(64),Hồ Chí Minh,15.900.000 ₫,258.200 ₫,30 Ngày Trả Hàng Miễn Phí,Không đủ thông tin,100%,100%,[1],[0],[0],[2],[61]
4,tivi,[ 6.6 HOTDEAL 7290K - Tặng Giá Treo TV DP243 c...,https://www.lazada.vn/products/66-hotdeal-7290...,7.847.300 ₫,44% Off,1.7K Đã bán,(682),Hồ Chí Minh,13.990.000 ₫,Miễn phí,30 Ngày Trả Hàng Miễn Phí,96%,100%,100%,[18],[4],[6],[12],[641]
5,tivi,[ 6.6 HOTDEAL 5290K - Tặng Giá Treo TV DP243 c...,https://www.lazada.vn/products/66-hotdeal-5290...,6.492.000 ₫,38% Off,494 Đã bán,(142),Hồ Chí Minh,10.490.000 ₫,Miễn phí,30 Ngày Trả Hàng Miễn Phí,96%,100%,100%,[2],[1],[1],[1],[137]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,tivi,Tivi Sony Mới Nhất 2024 - BRAVIA 3 43 Inch | 4...,https://www.lazada.vn/products/tivi-sony-moi-n...,14.790.000 ₫,15% Off,,(1),Bình Dương,17.400.000 ₫,,30 Ngày Trả Hàng Miễn Phí,98%,Không đủ thông tin,98%,[0],[0],[0],[0],[1]
197,tivi,Google Tivi TCL 4K 65 inch 65P745 - Hàng chính...,https://www.lazada.vn/products/google-tivi-tcl...,10.190.000 ₫,,,,Bình Dương,,,30 Ngày Trả Hàng Miễn Phí,96%,100%,100%,[0],[0],[0],[0],[0]
198,tivi,Tivi Led TCL 32S5400A 32 inch Android TV,https://www.lazada.vn/products/tivi-led-tcl-32...,4.490.000 ₫,43% Off,,,Hà Nội,7.900.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
199,tivi,[6.6 HOTDEAL 27990K - Tặng Loa S522W cho 20 đơ...,https://www.lazada.vn/products/66-hotdeal-2799...,37.990.000 ₫,5% Off,,(1),Hồ Chí Minh,39.990.000 ₫,Miễn phí,30 Ngày Trả Hàng Miễn Phí,96%,100%,100%,[0],[0],[0],[0],[1]
