In [1]:
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from time import sleep
import random
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from tqdm import tqdm

##### 1. Load function **get_Data_On_Page, get_Data_Detail_1Product** from file **Main_function.py**

In [2]:
from Main_function import get_Data_On_Page, get_Data_Detail_1Product

# URL Lazada: dienthoai, sort price high to low
path = 'chromedriver.exe'
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-notifications')
chrome_options.add_argument('--disable-infobars')

service = Service(executable_path=path)
driver = webdriver.Chrome(service=service, options=chrome_options)

#### 2. Crawl data

##### Crawl features displayed on the outside of the page

In [3]:
#Empty list, to store data from all pages
all_data = []

#Nhận link cho 4 page tiếp theo và sau đó đưa vào vòng lặp for cùng với hàm get_Data_On_Page
for page in tqdm(range(2, 6), desc="Crawling pages"):
    main_link = f'https://www.lazada.vn/catalog/?page={page}&q=dien%20thoai&sort=pricedesc'
    driver.get(main_link)
    sleep(random.uniform(2,5))                             #Random sleep to mimic human behavior and avoid getting blocked

    dienthoai = get_Data_On_Page(driver)
    all_data.append(dienthoai)                             

Crawling pages: 100%|██████████| 4/4 [02:09<00:00, 32.42s/it]


In [9]:
#Combine into a DataFrame
data_02_05 = pd.concat(all_data, ignore_index=True)
data_02_05.index = np.arange(1, len(data_02_05)+1)
data_02_05

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location
1,dien thoai,Điện thoại OPPO FIND N3 (16GB/512GB) - Hàng ch...,https://www.lazada.vn/products/dien-thoai-oppo...,41.990.000 ₫,7% Off,11 Đã bán,,Hồ Chí Minh
2,dien thoai,Điện thoại Samsung Galaxy S24 Ultra 12GB/1TB -...,https://www.lazada.vn/products/dien-thoai-sams...,37.590.001 ₫,16% Off,24 Đã bán,(2),Hồ Chí Minh
3,dien thoai,[GIÁ SỐC CHỈ 6.6 - VOUCHER 7TR] [TẶNG Buds2 Pr...,https://www.lazada.vn/products/gia-soc-chi-66-...,31.989.000 ₫,6% Off,1.3K Đã bán,(305),Hồ Chí Minh
4,dien thoai,[6.6 SALE TO] iPhone 15 Plus - Hàng Chính Hãng...,https://www.lazada.vn/products/66-sale-to-ipho...,31.990.000 ₫,9% Off,700 Đã bán,(145),Bắc Ninh
5,dien thoai,Điện thoại Samsung Galaxy S24 Ultra 12GB/512GB...,https://www.lazada.vn/products/dien-thoai-sams...,31.590.000 ₫,16% Off,21 Đã bán,(2),Hồ Chí Minh
...,...,...,...,...,...,...,...,...
156,dien thoai,Điện thoại di động Redmi 13C 6GB/128GB - Chính...,https://www.lazada.vn/products/dien-thoai-di-d...,3.090.000 ₫,11% Off,207 Đã bán,(115),Hồ Chí Minh
157,dien thoai,Điện thoại Redmi 13C (6GB/128GB) - Chính hãng,https://www.lazada.vn/products/dien-thoai-redm...,3.090.000 ₫,38% Off,654 Đã bán,(261),Hồ Chí Minh
158,dien thoai,Điện thoại Xiaomi Redmi 13C 6GB/128GB - Hàng c...,https://www.lazada.vn/products/dien-thoai-xiao...,3.085.000 ₫,16% Off,10 Đã bán,(1),Hồ Chí Minh
159,dien thoai,Điện Thoại Xiaomi Redmi Note 12 5G 8GB/256GB -...,https://www.lazada.vn/products/dien-thoai-xiao...,3.050.000 ₫,46% Off,,,Hà Nội


##### Crawl features data for each product

In [10]:
#Initialize dictionary to store details for all products
detail_products = {
    "Price_original": [],
    "Ship_price": [],
    "Return": [],
    "Sale_rating": [],
    "Ship_on_time": [],
    "Chat_response": [],
    "One_star": [],
    "Two_star": [],
    "Three_star": [],
    "Four_star": [],
    "Five_star": []
}

#Create a progress bar to track the crawl process
for link in tqdm(data_02_05.Link, desc = "Crawling product details"):
    details = get_Data_Detail_1Product(driver, link)
    for key in detail_products.keys():
        detail_products[key].append(details[key])

Crawling product details: 100%|██████████| 160/160 [1:17:02<00:00, 28.89s/it]


In [12]:
#Add columns to combined_data
for key, value in detail_products.items():
    data_02_05[key] = value

# Convert to csv
data_02_05.to_csv("page_02_05.csv", index=False)
data_02_05

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location,Price_original,Ship_price,Return,Sale_rating,Ship_on_time,Chat_response,One_star,Two_star,Three_star,Four_star,Five_star
1,dien thoai,Điện thoại OPPO FIND N3 (16GB/512GB) - Hàng ch...,https://www.lazada.vn/products/dien-thoai-oppo...,41.990.000 ₫,7% Off,11 Đã bán,,Hồ Chí Minh,44.990.000 ₫,20.100 ₫,30 Ngày Trả Hàng Miễn Phí,99%,100%,75%,[0],[0],[0],[0],[0]
2,dien thoai,Điện thoại Samsung Galaxy S24 Ultra 12GB/1TB -...,https://www.lazada.vn/products/dien-thoai-sams...,37.590.001 ₫,16% Off,24 Đã bán,(2),Hồ Chí Minh,44.490.000 ₫,,30 Ngày Trả Hàng Miễn Phí,90%,Không đủ thông tin,100%,[0],[0],[0],[0],[2]
3,dien thoai,[GIÁ SỐC CHỈ 6.6 - VOUCHER 7TR] [TẶNG Buds2 Pr...,https://www.lazada.vn/products/gia-soc-chi-66-...,31.989.000 ₫,6% Off,1.3K Đã bán,(305),Hồ Chí Minh,33.990.000 ₫,16.500 ₫,30 Ngày Trả Hàng Miễn Phí,91%,99%,100%,[2],[0],[1],[0],[302]
4,dien thoai,[6.6 SALE TO] iPhone 15 Plus - Hàng Chính Hãng...,https://www.lazada.vn/products/66-sale-to-ipho...,31.990.000 ₫,9% Off,700 Đã bán,(145),Bắc Ninh,34.990.000 ₫,5.500 ₫,30 Ngày Trả Hàng Miễn Phí,94%,98%,100%,[1],[0],[1],[0],[143]
5,dien thoai,Điện thoại Samsung Galaxy S24 Ultra 12GB/512GB...,https://www.lazada.vn/products/dien-thoai-sams...,31.590.000 ₫,16% Off,21 Đã bán,(2),Hồ Chí Minh,37.490.000 ₫,,30 Ngày Trả Hàng Miễn Phí,90%,Không đủ thông tin,100%,[0],[0],[0],[0],[3]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,dien thoai,Điện thoại di động Redmi 13C 6GB/128GB - Chính...,https://www.lazada.vn/products/dien-thoai-di-d...,3.090.000 ₫,11% Off,207 Đã bán,(115),Hồ Chí Minh,3.490.000 ₫,18.300 ₫,,98%,100%,90%,[0],[0],[1],[0],[114]
157,dien thoai,Điện thoại Redmi 13C (6GB/128GB) - Chính hãng,https://www.lazada.vn/products/dien-thoai-redm...,3.090.000 ₫,38% Off,654 Đã bán,(261),Hồ Chí Minh,4.990.000 ₫,18.300 ₫,,98%,100%,90%,[1],[0],[2],[2],[256]
158,dien thoai,Điện thoại Xiaomi Redmi 13C 6GB/128GB - Hàng c...,https://www.lazada.vn/products/dien-thoai-xiao...,3.085.000 ₫,16% Off,10 Đã bán,(1),Hồ Chí Minh,3.690.000 ₫,18.300 ₫,30 Ngày Trả Hàng Miễn Phí,99%,99%,95%,[0],[0],[0],[0],[1]
159,dien thoai,Điện Thoại Xiaomi Redmi Note 12 5G 8GB/256GB -...,https://www.lazada.vn/products/dien-thoai-xiao...,3.050.000 ₫,46% Off,,,Hà Nội,5.690.000 ₫,Miễn phí,,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
