In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

In [8]:
# Define the URL of the website
url = "https://www.oishiiplanet.it/collections/alimentari"  
# User-Agent to simulate a real browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}

In [9]:
# Lists to hold data
product_names = []
prices = []
country_labels = []
num_reviews = []

In [10]:
# Make a request to the website
try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Check if request was successful
    soup = BeautifulSoup(response.text, "html.parser")

    # Locate the product listing section on the webpage
    product_elements = soup.find_all("div", class_="product-item")  # Adjust based on the actual class name
    
    # Loop through each product item and extract details
    for product in product_elements:
        # Product name
        name = product.find("h2", class_="product-title").get_text(strip=True)
        product_names.append(name)

        # Price
        price = product.find("span", class_="product-price").get_text(strip=True)
        prices.append(price)

        # Country label (if present)
        country = product.find("span", class_="country-label")
        if country:
            country_labels.append(country.get_text(strip=True))
        else:
            country_labels.append("N/A")

        # Number of reviews (if present)
        reviews = product.find("span", class_="review-count")
        if reviews:
            num_reviews.append(reviews.get_text(strip=True))
        else:
            num_reviews.append("0")

        # Optional: add a delay to avoid rapid-fire requests
        time.sleep(random.uniform(0.5, 2.0))

    # Create a DataFrame and save data
    data = pd.DataFrame({
        "Product Name": product_names,
        "Price": prices,
        "Country Label": country_labels,
        "Number of Reviews": num_reviews
    })

    # Save data to a CSV file
    data.to_csv("snack_products.csv", index=False)
    print("Data saved to snack_products.csv")

except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")


Data saved to snack_products.csv


In [11]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time

# Set up the Selenium WebDriver
# Make sure to download the correct driver (e.g., ChromeDriver, GeckoDriver) and add it to your PATH
driver = webdriver.Chrome()  # or webdriver.Firefox() based on your browser

# Navigate to the webpage
url = "https://www.oishiiplanet.it/collections/alimentari"
driver.get(url)

# Give time for JavaScript to load the content
time.sleep(5)  # Adjust based on loading time

# Parse the page content with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")

# Lists to hold data
product_names = []
prices = []
country_labels = []
num_reviews = []  # May not be available on this site; adjust accordingly if not

# Locate the product listing section
product_elements = soup.find_all("div", class_="grid-product__title")  # Adjust class name based on inspection

# Loop through each product item and extract details
for product in product_elements:
    # Product name
    name = product.find("a").get_text(strip=True)
    product_names.append(name)

    # Price
    price = product.find_next("span", class_="money").get_text(strip=True)
    prices.append(price)

    # Country label (if present)
    country = "Unknown"  # Replace with actual logic if country label is present
    country_labels.append(country)

    # Reviews (if present, otherwise set to 0 or 'N/A')
    num_reviews.append("N/A")

# Quit the driver
driver.quit()

# Create a DataFrame and save data
data = pd.DataFrame({
    "Product Name": product_names,
    "Price": prices,
    "Country Label": country_labels,
    "Number of Reviews": num_reviews
})

# Save data to a CSV file
data.to_csv("snack_products.csv", index=False)
print("Data saved to snack_products.csv")


Data saved to snack_products.csv
