In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of Wikipedia's List of Best-Selling Books page
WIKI_URL = "https://en.wikipedia.org/wiki/List_of_best-selling_books"

# Send request and parse the page
response = requests.get(WIKI_URL)
soup = BeautifulSoup(response.text, "html.parser")

# Find all tables containing book data
tables = soup.find_all("table", class_="wikitable")

# Lists to store data
titles = []
authors = []
original_languages = []
years = []
approx_sales = []
genres = []

# Extract data from all tables
for table in tables:
    for row in table.find_all("tr")[1:]:  # Skip header row
        columns = row.find_all("td")
        if len(columns) >= 6:
            title = columns[0].text.strip().replace("[", "").replace("]", "")  # Remove citation brackets
            author = columns[1].text.strip()
            original_language = columns[2].text.strip()
            year = columns[3].text.strip()
            sales = columns[4].text.strip()
            genre = columns[5].text.strip()
            
            titles.append(title)
            authors.append(author)
            original_languages.append(original_language)
            years.append(year)
            approx_sales.append(sales)
            genres.append(genre)

# Create a DataFrame
df_wiki = pd.DataFrame({
    "Title": titles,
    "Author": authors,
    "Original Language": original_languages,
    "First Published": years,
    "Approximate Sales": approx_sales,
    "Genre": genres
})

# Save the data
df_wiki.to_csv("wikipedia_books.csv", index=False)

print("Wikipedia scraping complete! Data saved as wikipedia_books.csv")

Wikipedia scraping complete! Data saved as wikipedia_books.csv
