In [4]:
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from time import sleep
import random
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from tqdm import tqdm

##### 1. Load function **get_Data_On_Page, get_Data_Detail_1Product** from file **Main_function.py**

In [5]:
from Main_function import get_Data_On_Page, get_Data_Detail_1Product

# URL Lazada: tivi
path = 'chromedriver.exe'
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-notifications')
chrome_options.add_argument('--disable-infobars')

service = Service(executable_path=path)
driver = webdriver.Chrome(service=service, options=chrome_options)

#### 2. Crawl data

##### Crawl features displayed on the outside of the page

In [6]:
#Empty list, to store data from all pages
all_data = []

#Nhận link cho 5 page: 11-15 và sau đó đưa vào vòng lặp for cùng với hàm get_Data_On_Page
for page in tqdm(range(16, 21), desc="Crawling pages"):
    main_link = f'https://www.lazada.vn/catalog/?_keyori=ss&from=search_history&page={page}&q=tivi&spm=a2o4n.home-vn.search.6.4d423bdcueaQbu&sugg=tivi_4_1'
    driver.get(main_link)
    sleep(random.uniform(2,5))                             #Random sleep to mimic human behavior and avoid getting blocked

    tivi = get_Data_On_Page(driver)
    all_data.append(tivi)                             

Crawling pages: 100%|██████████| 5/5 [03:07<00:00, 37.54s/it]


In [7]:
#Combine into a DataFrame
data_16_20 = pd.concat(all_data, ignore_index=True)
data_16_20.index = np.arange(1, len(data_16_20)+1)
data_16_20

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location
1,tivi,Google Tivi Xiaomi 4K A Pro 43 inch L43M8-A2SE...,https://www.lazada.vn/products/google-tivi-xia...,5.850.000 ₫,6% Off,11 Đã bán,(5),Hà Nội
2,tivi,Smart Tivi Coocaa Full HD 43 Inch 43S3U,https://www.lazada.vn/products/smart-tivi-cooc...,3.990.000 ₫,50% Off,,,Hà Nội
3,tivi,[6.6 HOTDEAL 3790K - Tặng Giá Treo TV DP243 ch...,https://www.lazada.vn/products/66-hotdeal-3790...,4.290.000 ₫,7% Off,52 Đã bán,(13),Hồ Chí Minh
4,tivi,Google Tivi Xiaomi A Pro 4K 55 Inch L55M8-A2SE...,https://www.lazada.vn/products/google-tivi-xia...,8.363.000 ₫,40% Off,33 Đã bán,(14),Hồ Chí Minh
5,tivi,Android Tivi QLED Casper 4K 55 inch 55QG8000,https://www.lazada.vn/products/android-tivi-ql...,7.490.000 ₫,49% Off,10 Đã bán,(5),Hà Nội
...,...,...,...,...,...,...,...,...
196,tivi,Tivi Qled TCL 65C645 65 inch 4K-Ultra HD Googl...,https://www.lazada.vn/products/tivi-qled-tcl-6...,13.990.000 ₫,26% Off,,,Hà Nội
197,tivi,[Trả góp 0%]Smart TV Panasonic HD 32 inches TH...,https://www.lazada.vn/products/tra-gop-0smart-...,7.000.000 ₫,3% Off,,(1),Bình Dương
198,tivi,[6.6 HOTDEAL 14490K ] Google Tivi TCL 4K 65inc...,https://www.lazada.vn/products/66-hotdeal-1449...,14.490.000 ₫,,,,Hồ Chí Minh
199,tivi,Tivi cầm tay CASIO TV-880 cho mọi người sưu tầm,https://www.lazada.vn/products/tivi-cam-tay-ca...,600.000 ₫,,,,Hải Dương


##### Crawl features data for each product

In [8]:
#Initialize dictionary to store details for all products
detail_products = {
    "Price_original": [],
    "Ship_price": [],
    "Return": [],
    "Sale_rating": [],
    "Ship_on_time": [],
    "Chat_response": [],
    "One_star": [],
    "Two_star": [],
    "Three_star": [],
    "Four_star": [],
    "Five_star": []
}

#Create a progress bar to track the crawl process
for link in tqdm(data_16_20.Link, desc = "Crawling product details"):
    details = get_Data_Detail_1Product(driver, link)
    for key in detail_products.keys():
        detail_products[key].append(details[key])

Crawling product details: 100%|██████████| 200/200 [1:39:07<00:00, 29.74s/it]


In [9]:
#Add columns to combined_data
for key, value in detail_products.items():
    data_16_20[key] = value

# Convert to csv
data_16_20.to_csv("Page_16_20.csv", index=False)
data_16_20

Unnamed: 0,Type,Title,Link,Price_sale,Sale_off,Total_sold,Preview,Location,Price_original,Ship_price,Return,Sale_rating,Ship_on_time,Chat_response,One_star,Two_star,Three_star,Four_star,Five_star
1,tivi,Google Tivi Xiaomi 4K A Pro 43 inch L43M8-A2SE...,https://www.lazada.vn/products/google-tivi-xia...,5.850.000 ₫,6% Off,11 Đã bán,(5),Hà Nội,6.250.000 ₫,125.300 ₫,,100%,40%,Không đủ thông tin,[0],[0],[0],[0],[6]
2,tivi,Smart Tivi Coocaa Full HD 43 Inch 43S3U,https://www.lazada.vn/products/smart-tivi-cooc...,3.990.000 ₫,50% Off,,,Hà Nội,7.990.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
3,tivi,[6.6 HOTDEAL 3790K - Tặng Giá Treo TV DP243 ch...,https://www.lazada.vn/products/66-hotdeal-3790...,4.290.000 ₫,7% Off,52 Đã bán,(13),Hồ Chí Minh,4.590.000 ₫,Miễn phí,30 Ngày Trả Hàng Miễn Phí,96%,100%,100%,[0],[0],[0],[1],[13]
4,tivi,Google Tivi Xiaomi A Pro 4K 55 Inch L55M8-A2SE...,https://www.lazada.vn/products/google-tivi-xia...,8.363.000 ₫,40% Off,33 Đã bán,(14),Hồ Chí Minh,14.000.000 ₫,,,98%,100%,99%,[0],[0],[0],[1],[13]
5,tivi,Android Tivi QLED Casper 4K 55 inch 55QG8000,https://www.lazada.vn/products/android-tivi-ql...,7.490.000 ₫,49% Off,10 Đã bán,(5),Hà Nội,14.790.000 ₫,,,99%,Không đủ thông tin,100%,[0],[0],[0],[0],[5]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,tivi,Tivi Qled TCL 65C645 65 inch 4K-Ultra HD Googl...,https://www.lazada.vn/products/tivi-qled-tcl-6...,13.990.000 ₫,26% Off,,,Hà Nội,18.990.000 ₫,,30 Ngày Trả Hàng Miễn Phí,Nhà bán hàng mới,Không đủ thông tin,100%,[0],[0],[0],[0],[0]
197,tivi,[Trả góp 0%]Smart TV Panasonic HD 32 inches TH...,https://www.lazada.vn/products/tra-gop-0smart-...,7.000.000 ₫,3% Off,,(1),Bình Dương,7.200.000 ₫,,30 Ngày Trả Hàng Miễn Phí,97%,98%,100%,[0],[0],[0],[0],[1]
198,tivi,[6.6 HOTDEAL 14490K ] Google Tivi TCL 4K 65inc...,https://www.lazada.vn/products/66-hotdeal-1449...,14.490.000 ₫,,,,Hồ Chí Minh,,Miễn phí,30 Ngày Trả Hàng Miễn Phí,96%,100%,100%,[0],[0],[0],[0],[0]
199,tivi,Tivi cầm tay CASIO TV-880 cho mọi người sưu tầm,https://www.lazada.vn/products/tivi-cam-tay-ca...,600.000 ₫,,,,Hải Dương,,43.900 ₫,,83%,100%,100%,[0],[0],[0],[0],[0]
