In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setup Selenium WebDriver
service = Service(
    r"C:\Users\Nishant shah\OneDrive\Desktop\Nishant\Software\chromedriver.exe"
)
driver = webdriver.Chrome(service=service)

# URL of the web page to scrape
url = "https://www.theknot.com/marketplace/wedding-reception-venues-london-oh?sort=featured"
driver.get(url)

try:
    page_count = 0  # Initialize page counter
    Data = []  # List to hold extracted data

    while page_count < 3:  # Loop until 3 pages are processed
        page_count += 1
        print(f"Scraping Page {page_count}...")

        # Parse the page source with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, "html.parser")
        reception_venues = soup.find_all(
            "div",
            class_="col-12--71dcc col-md-6--3db49 col-xxl-4--80f68 new-vendor-card-column--482e2 card-column--f29c5",
        )

        # Extract data from each venue card
        for venue in reception_venues:
            try:
                # Extract venue name
                name_venue = venue.find(
                    "div", class_="vendor-name--a628b primaryBold--1abd6 body1--e44d4"
                ).text.strip()

                # Extract rating
                rating = (
                    venue.find("span", class_="star-count--ae4ff")
                    .text.replace("Star", " Star")
                    .strip()
                    if venue.find("span", class_="star-count--ae4ff")
                    else "N/A"
                )

                # Extract description
                description = (
                    venue.find(
                        "div", class_="container--38c42 body2--f71c1"
                    ).text.strip()
                    if venue.find("div", class_="container--38c42 body2--f71c1")
                    else "N/A"
                )

                Data.append([name_venue, rating, description])
            except Exception as e:
                print(f"Error extracting venue data: {e}")
                continue

        # Try to find and click the "Next Page" button
        try:
            next_page_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable(
                    (
                        By.XPATH,
                        "//a[@class='clickableArea--ae9e7' and contains(@aria-label, 'Go to page')]",
                    )
                )
            )
            next_page_button.click()
            print("Clicked 'Next Page'. Waiting for content to load...")
            time.sleep(3)  # Allow content to load
        except Exception as e:
            print("No 'Next Page' button found or an error occurred:", e)
            break

    # Convert extracted data into a DataFrame
    df = pd.DataFrame(Data, columns=["Venue Name", "Rating", "Description"])

    # Save data to an Excel file
    df.to_excel("reception_venues.xlsx", index=False, engine="openpyxl")
    print("Data saved to 'reception_venues.xlsx'.")

finally:
    # Close the browser
    driver.quit()

Scraping Page 1...
Clicked 'Next Page'. Waiting for content to load...
Scraping Page 2...
Clicked 'Next Page'. Waiting for content to load...
Scraping Page 3...
Clicked 'Next Page'. Waiting for content to load...
Data saved to 'reception_venues.xlsx'.
