In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin

BASE_URL = "https://books.toscrape.com/"
CATEGORIES = ["Fiction", "Travel"]
HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_category_urls():
    response = requests.get(BASE_URL, headers=HEADERS)
    soup = BeautifulSoup(response.text, 'html.parser')
    category_links = {}

    for a in soup.select('.side_categories ul li ul li a'):
        name = a.text.strip()
        if name in CATEGORIES:
            category_links[name] = urljoin(BASE_URL, a['href'])
    return category_links

def scrape_category(name, url):
    books = []
    while url:
        response = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(response.text, 'html.parser')
        for article in soup.select('article.product_pod'):
            title = article.h3.a['title']
            price = article.select_one('.price_color').text.strip()
            availability = article.select_one('.availability').text.strip()
            books.append({
                "Categoría": name,
                "Título": title,
                "Precio": price,
                "Disponibilidad": availability
            })

        next_page = soup.select_one('li.next a')
        if next_page:
            url = urljoin(url, next_page['href'])
        else:
            url = None
    return books

def main():
    category_urls = get_category_urls()
    all_books = []

    for category, url in category_urls.items():
        print(f"Scrapeando categoría: {category}")
        books = scrape_category(category, url)
        all_books.extend(books)

    df = pd.DataFrame(all_books)
    df.to_excel("libros_fiction_travel.xlsx", index=False)
    print("Archivo 'libros_fiction_travel.xlsx' creado correctamente.")

if __name__ == "__main__":
    main()

In [None]:
import requests
import pandas as pd

headers = {
    "Accept": "application/json",
    "User-Agent": "Mozilla/5.0"
}

# Endpoint base
url = "https://tienda.mercadona.es/api/categories/"

# ID de categoría (ejemplo: 318 = lácteos)
CATEGORY_ID = 318

response = requests.get(f"{url}{CATEGORY_ID}", headers=headers)
data = response.json()

productos = []

for product in data['products']:
    productos.append({
        'Nombre': product['display_name'],
        'Precio actual': product['price_instructions']['unit_price'],
        'Precio original': product['price_instructions'].get('reference_price', ''),
        'Unidad': product['price_instructions']['unit_size']
    })

# Guardar en Excel
df = pd.DataFrame(productos)
df.to_excel("productos_mercadona.xlsx", index=False)

print("Archivo 'productos_mercadona.xlsx' generado correctamente.")

In [None]:
import requests
import pandas as pd

headers = {
    "Accept": "application/json",
    "User-Agent": "Mozilla/5.0"
}

url = "https://tienda.mercadona.es/api/categories/"
CATEGORY_ID = 318

try:
    response = requests.get(f"{url}{CATEGORY_ID}", headers=headers)
    response.raise_for_status()
    data = response.json()
except Exception as e:
    print("Error al obtener datos de la API:", e)
    data = {}

productos = []

if 'products' in data:
    for product in data['products']:
        try:
            price_info = product.get('price_instructions', {})
            productos.append({
                'Nombre': product.get('display_name', ''),
                'Precio actual': price_info.get('unit_price', ''),
                'Precio original': price_info.get('reference_price', ''),
                'Unidad': price_info.get('unit_size', '')
            })
        except Exception as e:
            print("Error procesando un producto:", e)
else:
    print("No se encontraron productos en la respuesta de la API.")

if productos:
    df = pd.DataFrame(productos)
    df.to_excel("productos_mercadona.xlsx", index=False)
    print("Archivo 'productos_mercadona.xlsx' generado correctamente.")
else:
    print("No se generó el archivo porque no hay productos.")