In [1]:

import time as tm
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Function to find product links
def find_link(driver):
    base_url = 'https://clickbuy.com.vn/hang-cu?page='
    all_links = []

    # Loop through pages from 1 to 11
    for page in range(1, 12):
        url = f'{base_url}{page}'
        driver.get(url)
        tm.sleep(3)  # Wait for the page to load

        # Find all <a> tags containing product links
        lst_xpath = '//div[@class="list-products"]/div/a'
        tags = driver.find_elements(By.XPATH, lst_xpath)

        # Get href attributes from each tag and add to the list
        new_tags = [tag.get_attribute('href') for tag in tags if tag.get_attribute('href')]
        all_links.extend(new_tags)
    return all_links

def save_to_csv(data, feature_names):
    df = pd.DataFrame(data, columns=feature_names)
    df.to_csv("products_Click_buy.csv", index=False, encoding='utf-8-sig')

def prepare_data(data_raw):
    data = {"Tên sản phẩm": data_raw[0], "Giá bán cũ": data_raw[1], "Giá mới": data_raw[2]}
    details = data_raw[3]
    titles = data_raw[4]
    for title, detail in zip(titles, details):
        data[title] = detail
    return data
def save_link_to_file(new_tags):
    import pickle
    # Lưu vào file
    with open('product_links_click_buy.pkl', 'wb') as file:
        pickle.dump(new_tags, file)
def load_file_link_to_file(file_name = 'product_links_click_buy.pkl'):
    import pickle
    with open(file_name, 'rb') as file:
        loaded_links = pickle.load(file)
    return loaded_links

if __name__ == "__main__":
    # # Set Chrome options
    # chrome_options = webdriver.ChromeOptions()
    # chrome_options.add_argument('--headless=new')  # Running Chrome in headless mode
    # chrome_options.add_argument('--no-sandbox')    # Necessary for running in some environments

    # # Automatically download and use the correct version of chromedriver
    # service = Service(ChromeDriverManager().install())

    # # Initialize the WebDriver with the service and options
    # driver = webdriver.Chrome(service=service, options=chrome_options)

    # Find product links
    # new_tags = find_link(driver)  
    new_tags = load_file_link_to_file()
    data = []
    feature_names = ["Tên sản phẩm", "Giá bán cũ", "Giá mới"]

    for count, tag_url in enumerate(new_tags, 1):
        # if count > 10:  
        #     break
        print(f'Liên kết: {count}')
        try:
            response = requests.get(tag_url)
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                # Find the div containing product specifications
                specifications = soup.find('div', class_='product-specification__content')

                if specifications:
                    lines = [line.strip() for line in specifications.get_text(separator='\n').strip().split('\n') if line.strip()]

                    titles = []
                    details = []

                    # Pair data into title and detail lists
                    for i in range(0, len(lines), 2):
                        title = lines[i].strip()
                        detail = lines[i + 1].strip() if (i + 1) < len(lines) else ''
                        titles.append(title)
                        details.append(detail)

              
                    price_old = soup.find('p', class_='price-old').get_text(strip=True)
                    price_new  = soup.find('p', class_='price').get_text(strip=True)
                    title_item = soup.find('h1', class_='product-name').get_text(strip=True)

 
                    print("Tiêu đề sản phẩm:", title_item)
                    print("Giá cũ:", price_new)
                    print("Giá mới:", price_old)
                    print("Tiêu đề cấu hình:", titles)
                    print("Chi tiết:", details)
                    print("Link:", tag_url)

                    data_raw = [title_item, price_new, price_old, details, titles]
                    data_done = prepare_data(data_raw)

                    for title in titles:
                        if title not in feature_names:
                            feature_names.append(title)
                    data.append(data_done)
                    print("******************************************************************************")
                else:
                    print("Không tìm thấy thẻ chứa thông số kỹ thuật.")
            else:
                print(f"Lỗi: {response.status_code} - {response.reason}")

        except Exception as e:
            print(f"Lỗi khi lấy thông tin từ {tag_url}: {str(e)}")


    save_to_csv(data, feature_names)


Liên kết: 1
Tiêu đề sản phẩm: iPhone 15 Pro Max 256GB cũ đẹp 99%
Giá cũ: 25.490.000 ₫
Giá mới: 29.990.000 ₫
Tiêu đề cấu hình: []
Chi tiết: []
Link: https://clickbuy.com.vn/iphone-15-pro-max-cu
******************************************************************************
Liên kết: 2
Tiêu đề sản phẩm: iPhone 15 Pro 128GB cũ đẹp 99%
Giá cũ: 20.190.000 ₫
Giá mới: 26.990.000 ₫
Tiêu đề cấu hình: []
Chi tiết: []
Link: https://clickbuy.com.vn/iphone-15-pro-cu
******************************************************************************
Liên kết: 3
Tiêu đề sản phẩm: iPhone 15 Plus 128GB Cũ đẹp 99%
Giá cũ: 18.290.000 ₫
Giá mới: 23.990.000 ₫
Tiêu đề cấu hình: []
Chi tiết: []
Link: https://clickbuy.com.vn/iphone-15-plus-cu.html
******************************************************************************
Liên kết: 4
Tiêu đề sản phẩm: iPhone 14 Pro Max 128GB cũ đẹp 99%
Giá cũ: 21.290.000 ₫
Giá mới: 33.990.000 ₫
Tiêu đề cấu hình: ['Kích thước màn hình', 'CPU', 'Hệ điều hành', 'Bộ nhớ trong', 'RAM

In [2]:
print(len(feature_names))

47
