In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import csv
import re

def scrape_books():
    """Scrape book data from books.toscrape.com with robust price extraction"""
    url = "https://books.toscrape.com/"
    
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        books = soup.find_all('article', class_='product_pod')[:10]
        
        book_data = []
        for book in books:
            title = book.h3.a['title']
            price_element = book.find('p', class_='price_color')
            
            # Extract price using regex to find numbers
            price_text = price_element.text
            price_match = re.search(r'[\d\.]+', price_text)
            
            if price_match:
                price_value = float(price_match.group())
            else:
                # Fallback: try to extract from the HTML directly
                price_html = str(price_element)
                price_match = re.search(r'[\d\.]+', price_html)
                price_value = float(price_match.group()) if price_match else 0.0
            
            book_data.append({
                'title': title,
                'price_gbp': price_value,
                'scraped_at': datetime.now().isoformat()
            })
            
        return book_data
        
    except Exception as e:
        print(f"Error during scraping: {e}")
        return []

# The rest of the functions remain the same as in the first solution