In [1]:
# First cell: Install required libraries
# !pip install requests beautifulsoup4 pandas selenium

# Second cell: Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time


ModuleNotFoundError: No module named 'webdriver_manager'

In [None]:

# Third cell: Function to construct the Tokopedia search URL based on the query
def construct_search_url(query):
    base_url = "https://www.tokopedia.com/search?st=product&q="
    query = query.replace(' ', '+')  # Replace spaces with '+' for URL formatting
    return base_url + query

# Fourth cell: Scraping function using requests and BeautifulSoup
def scrape_tokopedia(query):
    url = construct_search_url(query)
    response = requests.get(url)
    
    # Check if the response is successful
    if response.status_code != 200:
        print(f"Failed to retrieve page. Status code: {response.status_code}")
        return None
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find product containers (You will need to inspect Tokopedia's page to get the right selectors)
    products = soup.find_all('div', class_='css-1sn1xa2')  # Example class, inspect the real class name
    
    product_list = []
    
    # Loop through products and extract details
    for product in products:
        try:
            name = product.find('div', class_='css-1b6t4dn').text  # Example class, inspect for correct class
            price = product.find('div', class_='css-o5uqvq').text  # Example class, inspect for correct class
            rating = product.find('span', class_='css-t70v7i').text if product.find('span', class_='css-t70v7i') else 'No Rating'  # Example class
            
            # Add product details to the list
            product_list.append({
                'Product Name': name,
                'Price': price,
                'Rating': rating
            })
        except AttributeError:
            # Skip product if there is an issue with extraction
            continue
    
    # Create a DataFrame from the list of products
    df = pd.DataFrame(product_list)
    
    return df

# Fifth cell: Function to scrape pages dynamically using Selenium (if needed)
def scrape_tokopedia_with_selenium(query):
    url = construct_search_url(query)
    
    # Initialize Selenium WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(url)
    
    # Wait for the page to load fully (you might need to adjust this)
    time.sleep(5)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    
    # Same parsing process as with requests, but now using the rendered content from Selenium
    products = soup.find_all('div', class_='css-1sn1xa2')  # Example class
    
    product_list = []
    
    for product in products:
        try:
            name = product.find('div', class_='css-1b6t4dn').text
            price = product.find('div', class_='css-o5uqvq').text
            rating = product.find('span', class_='css-t70v7i').text if product.find('span', class_='css-t70v7i') else 'No Rating'
            
            product_list.append({
                'Product Name': name,
                'Price': price,
                'Rating': rating
            })
        except AttributeError:
            continue
    
    df = pd.DataFrame(product_list)
    
    return df

# Sixth cell: Main execution
if __name__ == "__main__":
    search_query = "laptop"  # Example search query, can be changed
    df = scrape_tokopedia(search_query)
    
    if df is not None:
        print("Scraped Products:")
        display(df)  # Display DataFrame in Jupyter Notebook
        df.to_csv('tokopedia_products.csv', index=False)  # Export to CSV
    else:
        print("No data scraped.")
